Spaces:

rootstrap-org
/

waste-classifier

Sleeping

App Files Files Community

santit96 commited on Dec 13, 2023

Commit

fa84113

0 Parent(s):

Create the streamlit app that classifies the trash in an image into classes

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +4 -0
.github/workflows/main.yml +20 -0
.gitignore +5 -0
README.md +14 -0
app.py +82 -0
constants.py +8 -0
efficientdet/__init__.py +0 -0
efficientdet/effdet/__init__.py +7 -0
efficientdet/effdet/anchors.py +421 -0
efficientdet/effdet/bench.py +143 -0
efficientdet/effdet/config/__init__.py +4 -0
efficientdet/effdet/config/config_utils.py +9 -0
efficientdet/effdet/config/fpn_config.py +184 -0
efficientdet/effdet/config/model_config.py +538 -0
efficientdet/effdet/config/train_config.py +34 -0
efficientdet/effdet/data/__init__.py +6 -0
efficientdet/effdet/data/dataset.py +145 -0
efficientdet/effdet/data/dataset_config.py +194 -0
efficientdet/effdet/data/dataset_factory.py +85 -0
efficientdet/effdet/data/input_config.py +60 -0
efficientdet/effdet/data/loader.py +226 -0
efficientdet/effdet/data/parsers/__init__.py +2 -0
efficientdet/effdet/data/parsers/parser.py +82 -0
efficientdet/effdet/data/parsers/parser_coco.py +93 -0
efficientdet/effdet/data/parsers/parser_config.py +49 -0
efficientdet/effdet/data/parsers/parser_factory.py +19 -0
efficientdet/effdet/data/parsers/parser_open_images.py +211 -0
efficientdet/effdet/data/parsers/parser_voc.py +148 -0
efficientdet/effdet/data/random_erasing.py +94 -0
efficientdet/effdet/data/transforms.py +275 -0
efficientdet/effdet/data/transforms_albumentation.py +23 -0
efficientdet/effdet/distributed.py +308 -0
efficientdet/effdet/efficientdet.py +557 -0
efficientdet/effdet/evaluation/README.md +7 -0
efficientdet/effdet/evaluation/__init__.py +0 -0
efficientdet/effdet/evaluation/detection_evaluator.py +590 -0
efficientdet/effdet/evaluation/fields.py +105 -0
efficientdet/effdet/evaluation/metrics.py +148 -0
efficientdet/effdet/evaluation/np_box_list.py +696 -0
efficientdet/effdet/evaluation/np_mask_list.py +478 -0
efficientdet/effdet/evaluation/object_detection_evaluation.py +273 -0
efficientdet/effdet/evaluation/per_image_evaluation.py +538 -0
efficientdet/effdet/evaluator.py +195 -0
efficientdet/effdet/factory.py +54 -0
efficientdet/effdet/helpers.py +22 -0
efficientdet/effdet/loss.py +259 -0
efficientdet/effdet/object_detection/README.md +3 -0
efficientdet/effdet/object_detection/__init__.py +22 -0
efficientdet/effdet/object_detection/argmax_matcher.py +174 -0
efficientdet/effdet/object_detection/box_coder.py +172 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,4 @@

+*.psd filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text

.github/workflows/main.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://santit96:[email protected]/spaces/rootstrap-org/waste-classifier main

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+__pycache__
+.DS_Store
+*.jpg
+*.png
+*.jpeg

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Waste Classifier
+emoji: ♻️
+colorFrom: green
+colorTo: gray
+sdk: streamlit
+sdk_version: 1.25.0
+pinned: false
+---
+Waste Classifier
+==============================
+Waste Detection and Classifier tool

app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""
+Streamlit app
+"""
+import sys
+import streamlit as st
+from constants import (CLAS_FILEPATH, CLAS_THRESHOLD, CLASSES, DET_FILEPATH,
+                       DET_NAME, DET_THRESHOLD, DEVICE, OUTPUT_IMG_FILEPATH)
+sys.path.append("./efficientdet")
+from PIL import Image
+from efficientdet.efficientdet import plot_results
+from trash_detector import detect_trash
+def initial_config():
+    """
+    Initial configuration of streamlit page
+    """
+    st.set_page_config(
+        page_title="Waste Classifier",
+        page_icon="♻️",
+    )
+def render():
+    """
+    Render the streamlit app
+    """
+    st.title("Waste classifier")
+    st.markdown("""Classify your waste into different classes""")
+    # Image loader and button
+    uploaded_file = st.file_uploader(
+        "Upload image with trash", type=["jpg", "jpeg", "png", "gif", "bmp"]
+    )
+    classify_button = st.button("Classify trash")
+    if classify_button:
+        if not uploaded_file:
+            st.error("Upload an image")
+        else:
+            # Create two columns
+            col1, col2 = st.columns(2)
+            # Column 1: Uploaded image
+            with col1:
+                st.write("Uploaded image")
+                st.image(
+                    uploaded_file, caption="Uploaded Image.", use_column_width=True
+                )
+            # Column 2: Classified image
+            with col2:
+                with st.spinner(text="Classifying the trash..."):
+                    img = Image.open(uploaded_file).convert("RGB")
+                    cls_prob, bboxes_final = detect_trash(
+                        img,
+                        DET_NAME,
+                        DET_FILEPATH,
+                        CLAS_FILEPATH,
+                        DEVICE,
+                        DET_THRESHOLD,
+                        CLAS_THRESHOLD,
+                    )
+                    # plot and save demo image
+                    plot_results(
+                        img, cls_prob, bboxes_final, CLASSES, OUTPUT_IMG_FILEPATH
+                    )
+                    output_img = Image.open(OUTPUT_IMG_FILEPATH)
+                    st.write("Classified image")
+                    st.image(
+                        output_img, caption="Classified Image.", use_column_width=True
+                    )
+if __name__ == "__main__":
+    initial_config()
+    render()

constants.py ADDED Viewed

	@@ -0,0 +1,8 @@

+CLAS_FILEPATH = "models/resnet50-classifier.pkl"
+DET_FILEPATH = "models/efficientdet-d2-detector.pth.tar"
+CLASSES = ["cardboard", "compost", "glass", "metal", "paper", "plastic", "trash"]
+DET_NAME = "tf_efficientdet_d2"
+CLAS_THRESHOLD = 0.5
+DET_THRESHOLD = 0.17
+DEVICE = "cpu"
+OUTPUT_IMG_FILEPATH = "classified_image.jpg"

efficientdet/__init__.py ADDED Viewed

File without changes

efficientdet/effdet/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .efficientdet import EfficientDet
+from .bench import DetBenchPredict, DetBenchTrain, unwrap_bench
+from .data import create_dataset, create_loader, create_parser, DetectionDatset, SkipSubset
+from .evaluator import CocoEvaluator, PascalEvaluator, OpenImagesEvaluator, create_evaluator
+from .config import get_efficientdet_config, default_detection_model_configs
+from .factory import create_model, create_model_from_config
+from .helpers import load_checkpoint, load_pretrained

efficientdet/effdet/anchors.py ADDED Viewed

	@@ -0,0 +1,421 @@

+""" RetinaNet / EfficientDet Anchor Gen
+Adapted for PyTorch from Tensorflow impl at
+    https://github.com/google/automl/blob/6f6694cec1a48cdb33d5d1551a2d5db8ad227798/efficientdet/anchors.py
+Hacked together by Ross Wightman, original copyright below
+"""
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Anchor definition.
+This module is borrowed from TPU RetinaNet implementation:
+https://github.com/tensorflow/tpu/blob/master/models/official/retinanet/anchors.py
+"""
+from typing import Optional, Tuple, Sequence
+import numpy as np
+import torch
+import torch.nn as nn
+#import torchvision.ops.boxes as tvb
+from torchvision.ops.boxes import batched_nms, remove_small_boxes
+from typing import List
+from effdet.object_detection import ArgMaxMatcher, FasterRcnnBoxCoder, BoxList, IouSimilarity, TargetAssigner
+from .soft_nms import batched_soft_nms
+# The minimum score to consider a logit for identifying detections.
+MIN_CLASS_SCORE = -5.0
+# The score for a dummy detection
+_DUMMY_DETECTION_SCORE = -1e5
+# The maximum number of (anchor,class) pairs to keep for non-max suppression.
+MAX_DETECTION_POINTS = 5000
+# The maximum number of detections per image.
+MAX_DETECTIONS_PER_IMAGE = 100
+def decode_box_outputs(rel_codes, anchors, output_xyxy: bool=False):
+    """Transforms relative regression coordinates to absolute positions.
+    Network predictions are normalized and relative to a given anchor; this
+    reverses the transformation and outputs absolute coordinates for the input image.
+    Args:
+        rel_codes: box regression targets.
+        anchors: anchors on all feature levels.
+    Returns:
+        outputs: bounding boxes.
+    """
+    ycenter_a = (anchors[:, 0] + anchors[:, 2]) / 2
+    xcenter_a = (anchors[:, 1] + anchors[:, 3]) / 2
+    ha = anchors[:, 2] - anchors[:, 0]
+    wa = anchors[:, 3] - anchors[:, 1]
+    ty, tx, th, tw = rel_codes.unbind(dim=1)
+    w = torch.exp(tw) * wa
+    h = torch.exp(th) * ha
+    ycenter = ty * ha + ycenter_a
+    xcenter = tx * wa + xcenter_a
+    ymin = ycenter - h / 2.
+    xmin = xcenter - w / 2.
+    ymax = ycenter + h / 2.
+    xmax = xcenter + w / 2.
+    if output_xyxy:
+        out = torch.stack([xmin, ymin, xmax, ymax], dim=1)
+    else:
+        out = torch.stack([ymin, xmin, ymax, xmax], dim=1)
+    return out
+def clip_boxes_xyxy(boxes: torch.Tensor, size: torch.Tensor):
+    boxes = boxes.clamp(min=0)
+    size = torch.cat([size, size], dim=0)
+    boxes = boxes.min(size)
+    return boxes
+def generate_detections(
+        cls_outputs, box_outputs, anchor_boxes, indices, classes,
+        img_scale: Optional[torch.Tensor], img_size: Optional[torch.Tensor],
+        max_det_per_image: int = MAX_DETECTIONS_PER_IMAGE, soft_nms: bool = False):
+    """Generates detections with RetinaNet model outputs and anchors.
+    Args:
+        cls_outputs: a torch tensor with shape [N, 1], which has the highest class
+            scores on all feature levels. The N is the number of selected
+            top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
+        box_outputs: a torch tensor with shape [N, 4], which stacks box regression
+            outputs on all feature levels. The N is the number of selected top-k
+            total anchors on all levels. (k being MAX_DETECTION_POINTS)
+        anchor_boxes: a torch tensor with shape [N, 4], which stacks anchors on all
+            feature levels. The N is the number of selected top-k total anchors on all levels.
+        indices: a torch tensor with shape [N], which is the indices from top-k selection.
+        classes: a torch tensor with shape [N], which represents the class
+            prediction on all selected anchors from top-k selection.
+        img_scale: a float tensor representing the scale between original image
+            and input image for the detector. It is used to rescale detections for
+            evaluating with the original groundtruth annotations.
+        max_det_per_image: an int constant, added as argument to make torchscript happy
+    Returns:
+        detections: detection results in a tensor with shape [MAX_DETECTION_POINTS, 6],
+            each row representing [x_min, y_min, x_max, y_max, score, class]
+    """
+    assert box_outputs.shape[-1] == 4
+    assert anchor_boxes.shape[-1] == 4
+    assert cls_outputs.shape[-1] == 1
+    anchor_boxes = anchor_boxes[indices, :]
+    # Appply bounding box regression to anchors, boxes are converted to xyxy
+    # here since PyTorch NMS expects them in that form.
+    boxes = decode_box_outputs(box_outputs.float(), anchor_boxes, output_xyxy=True)
+    if img_scale is not None and img_size is not None:
+        boxes = clip_boxes_xyxy(boxes, img_size / img_scale)  # clip before NMS better?
+    scores = cls_outputs.sigmoid().squeeze(1).float()
+    if soft_nms:
+        top_detection_idx, soft_scores = batched_soft_nms(
+            boxes, scores, classes, method_gaussian=True, iou_threshold=0.3, score_threshold=.001)
+        scores[top_detection_idx] = soft_scores
+    else:
+        top_detection_idx = batched_nms(boxes, scores, classes, iou_threshold=0.5)
+    # keep only topk scoring predictions
+    top_detection_idx = top_detection_idx[:max_det_per_image]
+    boxes = boxes[top_detection_idx]
+    scores = scores[top_detection_idx, None]
+    classes = classes[top_detection_idx, None] + 1  # back to class idx with background class = 0
+    if img_scale is not None:
+        boxes = boxes * img_scale
+    # FIXME add option to convert boxes back to yxyx? Otherwise must be handled downstream if
+    # that is the preferred output format.
+    # stack em and pad out to MAX_DETECTIONS_PER_IMAGE if necessary
+    num_det = len(top_detection_idx)
+    detections = torch.cat([boxes, scores, classes.float()], dim=1)
+    if num_det < max_det_per_image:
+        detections = torch.cat([
+            detections,
+            torch.zeros((max_det_per_image - num_det, 6), device=detections.device, dtype=detections.dtype)
+        ], dim=0)
+    return detections
+def get_feat_sizes(image_size: Tuple[int, int], max_level: int):
+    """Get feat widths and heights for all levels.
+    Args:
+      image_size: a tuple (H, W)
+      max_level: maximum feature level.
+    Returns:
+      feat_sizes: a list of tuples (height, width) for each level.
+    """
+    feat_size = image_size
+    feat_sizes = [feat_size]
+    for _ in range(1, max_level + 1):
+        feat_size = ((feat_size[0] - 1) // 2 + 1, (feat_size[1] - 1) // 2 + 1)
+        feat_sizes.append(feat_size)
+    return feat_sizes
+class Anchors(nn.Module):
+    """RetinaNet Anchors class."""
+    def __init__(self, min_level, max_level, num_scales, aspect_ratios, anchor_scale, image_size: Tuple[int, int]):
+        """Constructs multiscale RetinaNet anchors.
+        Args:
+            min_level: integer number of minimum level of the output feature pyramid.
+            max_level: integer number of maximum level of the output feature pyramid.
+            num_scales: integer number representing intermediate scales added
+                on each level. For instances, num_scales=2 adds two additional
+                anchor scales [2^0, 2^0.5] on each level.
+            aspect_ratios: list of tuples representing the aspect ratio anchors added
+                on each level. For instances, aspect_ratios =
+                [(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
+            anchor_scale: float number representing the scale of size of the base
+                anchor to the feature stride 2^level.
+            image_size: Sequence specifying input image size of model (H, W).
+                The image_size should be divided by the largest feature stride 2^max_level.
+        """
+        super(Anchors, self).__init__()
+        self.min_level = min_level
+        self.max_level = max_level
+        self.num_scales = num_scales
+        self.aspect_ratios = aspect_ratios
+        if isinstance(anchor_scale, Sequence):
+            assert len(anchor_scale) == max_level - min_level + 1
+            self.anchor_scales = anchor_scale
+        else:
+            self.anchor_scales = [anchor_scale] * (max_level - min_level + 1)
+        assert isinstance(image_size, Sequence) and len(image_size) == 2
+        # FIXME this restriction can likely be relaxed with some additional changes
+        assert image_size[0] % 2 ** max_level == 0, 'Image size must be divisible by 2 ** max_level (128)'
+        assert image_size[1] % 2 ** max_level == 0, 'Image size must be divisible by 2 ** max_level (128)'
+        self.image_size = tuple(image_size)
+        self.feat_sizes = get_feat_sizes(image_size, max_level)
+        self.config = self._generate_configs()
+        self.register_buffer('boxes', self._generate_boxes())
+    @classmethod
+    def from_config(cls, config):
+        return cls(
+            config.min_level, config.max_level,
+            config.num_scales, config.aspect_ratios,
+            config.anchor_scale, config.image_size)
+    def _generate_configs(self):
+        """Generate configurations of anchor boxes."""
+        anchor_configs = {}
+        feat_sizes = self.feat_sizes
+        for level in range(self.min_level, self.max_level + 1):
+            anchor_configs[level] = []
+            for scale_octave in range(self.num_scales):
+                for aspect in self.aspect_ratios:
+                    anchor_configs[level].append(
+                        ((feat_sizes[0][0] // feat_sizes[level][0],
+                          feat_sizes[0][1] // feat_sizes[level][1]),
+                         scale_octave / float(self.num_scales), aspect,
+                         self.anchor_scales[level - self.min_level]))
+        return anchor_configs
+    def _generate_boxes(self):
+        """Generates multiscale anchor boxes."""
+        boxes_all = []
+        for _, configs in self.config.items():
+            boxes_level = []
+            for config in configs:
+                stride, octave_scale, aspect, anchor_scale = config
+                base_anchor_size_x = anchor_scale * stride[1] * 2 ** octave_scale
+                base_anchor_size_y = anchor_scale * stride[0] * 2 ** octave_scale
+                if isinstance(aspect, Sequence):
+                    aspect_x = aspect[0]
+                    aspect_y = aspect[1]
+                else:
+                    aspect_x = np.sqrt(aspect)
+                    aspect_y = 1.0 / aspect_x
+                anchor_size_x_2 = base_anchor_size_x * aspect_x / 2.0
+                anchor_size_y_2 = base_anchor_size_y * aspect_y / 2.0
+                x = np.arange(stride[1] / 2, self.image_size[1], stride[1])
+                y = np.arange(stride[0] / 2, self.image_size[0], stride[0])
+                xv, yv = np.meshgrid(x, y)
+                xv = xv.reshape(-1)
+                yv = yv.reshape(-1)
+                boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2,
+                                   yv + anchor_size_y_2, xv + anchor_size_x_2))
+                boxes = np.swapaxes(boxes, 0, 1)
+                boxes_level.append(np.expand_dims(boxes, axis=1))
+            # concat anchors on the same level to the reshape NxAx4
+            boxes_level = np.concatenate(boxes_level, axis=1)
+            boxes_all.append(boxes_level.reshape([-1, 4]))
+        anchor_boxes = np.vstack(boxes_all)
+        anchor_boxes = torch.from_numpy(anchor_boxes).float()
+        return anchor_boxes
+    def get_anchors_per_location(self):
+        return self.num_scales * len(self.aspect_ratios)
+class AnchorLabeler(object):
+    """Labeler for multiscale anchor boxes.
+    """
+    def __init__(self, anchors, num_classes: int, match_threshold: float = 0.5):
+        """Constructs anchor labeler to assign labels to anchors.
+        Args:
+            anchors: an instance of class Anchors.
+            num_classes: integer number representing number of classes in the dataset.
+            match_threshold: float number between 0 and 1 representing the threshold
+                to assign positive labels for anchors.
+        """
+        similarity_calc = IouSimilarity()
+        matcher = ArgMaxMatcher(
+            match_threshold,
+            unmatched_threshold=match_threshold,
+            negatives_lower_than_unmatched=True,
+            force_match_for_each_row=True)
+        box_coder = FasterRcnnBoxCoder()
+        self.target_assigner = TargetAssigner(similarity_calc, matcher, box_coder)
+        self.anchors = anchors
+        self.match_threshold = match_threshold
+        self.num_classes = num_classes
+        self.indices_cache = {}
+    def label_anchors(self, gt_boxes, gt_classes, filter_valid=True):
+        """Labels anchors with ground truth inputs.
+        Args:
+            gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
+                For each row, it stores [y0, x0, y1, x1] for four corners of a box.
+            gt_classes: A integer tensor with shape [N, 1] representing groundtruth classes.
+            filter_valid: Filter out any boxes w/ gt class <= -1 before assigning
+        Returns:
+            cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level].
+                The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l
+                represent the dimension of class logits at l-th level.
+            box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level].
+                The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and
+                width_l represent the dimension of bounding box regression output at l-th level.
+            num_positives: scalar tensor storing number of positives in an image.
+        """
+        cls_targets_out = []
+        box_targets_out = []
+        if filter_valid:
+            valid_idx = gt_classes > -1  # filter gt targets w/ label <= -1
+            gt_boxes = gt_boxes[valid_idx]
+            gt_classes = gt_classes[valid_idx]
+        cls_targets, box_targets, matches = self.target_assigner.assign(
+            BoxList(self.anchors.boxes), BoxList(gt_boxes), gt_classes)
+        # class labels start from 1 and the background class = -1
+        cls_targets = (cls_targets - 1).long()
+        # Unpack labels.
+        """Unpacks an array of cls/box into multiple scales."""
+        count = 0
+        for level in range(self.anchors.min_level, self.anchors.max_level + 1):
+            feat_size = self.anchors.feat_sizes[level]
+            steps = feat_size[0] * feat_size[1] * self.anchors.get_anchors_per_location()
+            cls_targets_out.append(cls_targets[count:count + steps].view([feat_size[0], feat_size[1], -1]))
+            box_targets_out.append(box_targets[count:count + steps].view([feat_size[0], feat_size[1], -1]))
+            count += steps
+        num_positives = (matches.match_results > -1).float().sum()
+        return cls_targets_out, box_targets_out, num_positives
+    def batch_label_anchors(self, gt_boxes, gt_classes, filter_valid=True):
+        batch_size = len(gt_boxes)
+        assert batch_size == len(gt_classes)
+        num_levels = self.anchors.max_level - self.anchors.min_level + 1
+        cls_targets_out = [[] for _ in range(num_levels)]
+        box_targets_out = [[] for _ in range(num_levels)]
+        num_positives_out = []
+        anchor_box_list = BoxList(self.anchors.boxes)
+        for i in range(batch_size):
+            last_sample = i == batch_size - 1
+            if filter_valid:
+                valid_idx = gt_classes[i] > -1  # filter gt targets w/ label <= -1
+                gt_box_list = BoxList(gt_boxes[i][valid_idx])
+                gt_class_i = gt_classes[i][valid_idx]
+            else:
+                gt_box_list = BoxList(gt_boxes[i])
+                gt_class_i = gt_classes[i]
+            cls_targets, box_targets, matches = self.target_assigner.assign(anchor_box_list, gt_box_list, gt_class_i)
+            # class labels start from 1 and the background class = -1
+            cls_targets = (cls_targets - 1).long()
+            # Unpack labels.
+            """Unpacks an array of cls/box into multiple scales."""
+            count = 0
+            for level in range(self.anchors.min_level, self.anchors.max_level + 1):
+                level_idx = level - self.anchors.min_level
+                feat_size = self.anchors.feat_sizes[level]
+                steps = feat_size[0] * feat_size[1] * self.anchors.get_anchors_per_location()
+                cls_targets_out[level_idx].append(
+                    cls_targets[count:count + steps].view([feat_size[0], feat_size[1], -1]))
+                box_targets_out[level_idx].append(
+                    box_targets[count:count + steps].view([feat_size[0], feat_size[1], -1]))
+                count += steps
+                if last_sample:
+                    cls_targets_out[level_idx] = torch.stack(cls_targets_out[level_idx])
+                    box_targets_out[level_idx] = torch.stack(box_targets_out[level_idx])
+            num_positives_out.append((matches.match_results > -1).float().sum())
+            if last_sample:
+                num_positives_out = torch.stack(num_positives_out)
+        return cls_targets_out, box_targets_out, num_positives_out

efficientdet/effdet/bench.py ADDED Viewed

	@@ -0,0 +1,143 @@

+""" PyTorch EfficientDet support benches
+Hacked together by Ross Wightman
+"""
+from typing import Optional, Dict, List
+import torch
+import torch.nn as nn
+from timm.utils import ModelEma
+from .anchors import Anchors, AnchorLabeler, generate_detections, MAX_DETECTION_POINTS
+from .loss import DetectionLoss
+def _post_process(
+        cls_outputs: List[torch.Tensor],
+        box_outputs: List[torch.Tensor],
+        num_levels: int,
+        num_classes: int,
+        max_detection_points: int = MAX_DETECTION_POINTS,
+):
+    """Selects top-k predictions.
+    Post-proc code adapted from Tensorflow version at: https://github.com/google/automl/tree/master/efficientdet
+    and optimized for PyTorch.
+    Args:
+        cls_outputs: an OrderDict with keys representing levels and values
+            representing logits in [batch_size, height, width, num_anchors].
+        box_outputs: an OrderDict with keys representing levels and values
+            representing box regression targets in [batch_size, height, width, num_anchors * 4].
+        num_levels (int): number of feature levels
+        num_classes (int): number of output classes
+    """
+    batch_size = cls_outputs[0].shape[0]
+    cls_outputs_all = torch.cat([
+        cls_outputs[level].permute(0, 2, 3, 1).reshape([batch_size, -1, num_classes])
+        for level in range(num_levels)], 1)
+    box_outputs_all = torch.cat([
+        box_outputs[level].permute(0, 2, 3, 1).reshape([batch_size, -1, 4])
+        for level in range(num_levels)], 1)
+    _, cls_topk_indices_all = torch.topk(cls_outputs_all.reshape(batch_size, -1), dim=1, k=max_detection_points)
+    indices_all = cls_topk_indices_all // num_classes
+    classes_all = cls_topk_indices_all % num_classes
+    box_outputs_all_after_topk = torch.gather(
+        box_outputs_all, 1, indices_all.unsqueeze(2).expand(-1, -1, 4))
+    cls_outputs_all_after_topk = torch.gather(
+        cls_outputs_all, 1, indices_all.unsqueeze(2).expand(-1, -1, num_classes))
+    cls_outputs_all_after_topk = torch.gather(
+        cls_outputs_all_after_topk, 2, classes_all.unsqueeze(2))
+    return cls_outputs_all_after_topk, box_outputs_all_after_topk, indices_all, classes_all
+@torch.jit.script
+def _batch_detection(
+        batch_size: int, class_out, box_out, anchor_boxes, indices, classes,
+        img_scale: Optional[torch.Tensor] = None, img_size: Optional[torch.Tensor] = None):
+    batch_detections = []
+    # FIXME we may be able to do this as a batch with some tensor reshaping/indexing, PR welcome
+    for i in range(batch_size):
+        img_scale_i = None if img_scale is None else img_scale[i]
+        img_size_i = None if img_size is None else img_size[i]
+        detections = generate_detections(
+            class_out[i], box_out[i], anchor_boxes, indices[i], classes[i], img_scale_i, img_size_i)
+        batch_detections.append(detections)
+    return torch.stack(batch_detections, dim=0)
+class DetBenchPredict(nn.Module):
+    def __init__(self, model):
+        super(DetBenchPredict, self).__init__()
+        self.model = model
+        self.config = model.config  # FIXME remove this when we can use @property (torchscript limitation)
+        self.num_levels = model.config.num_levels
+        self.num_classes = model.config.num_classes
+        self.anchors = Anchors.from_config(model.config)
+    def forward(self, x, img_info: Optional[Dict[str, torch.Tensor]] = None):
+        class_out, box_out = self.model(x)
+        class_out, box_out, indices, classes = _post_process(
+            class_out, box_out, num_levels=self.num_levels, num_classes=self.num_classes)
+        if img_info is None:
+            img_scale, img_size = None, None
+        else:
+            img_scale, img_size = img_info['img_scale'], img_info['img_size']
+        return _batch_detection(
+            x.shape[0], class_out, box_out, self.anchors.boxes, indices, classes, img_scale, img_size)
+class DetBenchTrain(nn.Module):
+    def __init__(self, model, create_labeler=True):
+        super(DetBenchTrain, self).__init__()
+        self.model = model
+        self.config = model.config  # FIXME remove this when we can use @property (torchscript limitation)
+        self.num_levels = model.config.num_levels
+        self.num_classes = model.config.num_classes
+        self.anchors = Anchors.from_config(model.config)
+        self.anchor_labeler = None
+        if create_labeler:
+            self.anchor_labeler = AnchorLabeler(self.anchors, self.num_classes, match_threshold=0.5)
+        self.loss_fn = DetectionLoss(model.config)
+    def forward(self, x, target: Dict[str, torch.Tensor]):
+        class_out, box_out = self.model(x)
+        if self.anchor_labeler is None:
+            # target should contain pre-computed anchor labels if labeler not present in bench
+            assert 'label_num_positives' in target
+            cls_targets = [target[f'label_cls_{l}'] for l in range(self.num_levels)]
+            box_targets = [target[f'label_bbox_{l}'] for l in range(self.num_levels)]
+            num_positives = target['label_num_positives']
+        else:
+            cls_targets, box_targets, num_positives = self.anchor_labeler.batch_label_anchors(
+                target['bbox'], target['cls'])
+        loss, class_loss, box_loss = self.loss_fn(class_out, box_out, cls_targets, box_targets, num_positives)
+        output = {'loss': loss, 'class_loss': class_loss, 'box_loss': box_loss}
+        if not self.training:
+            # if eval mode, output detections for evaluation
+            class_out_pp, box_out_pp, indices, classes = _post_process(
+                class_out, box_out, num_levels=self.num_levels, num_classes=self.num_classes)
+            output['detections'] = _batch_detection(
+                x.shape[0], class_out_pp, box_out_pp, self.anchors.boxes, indices, classes,
+                target['img_scale'], target['img_size'])
+        return output
+def unwrap_bench(model):
+    # Unwrap a model in support bench so that various other fns can access the weights and attribs of the
+    # underlying model directly
+    if isinstance(model, ModelEma):  # unwrap ModelEma
+        return unwrap_bench(model.ema)
+    elif hasattr(model, 'module'):  # unwrap DDP
+        return unwrap_bench(model.module)
+    elif hasattr(model, 'model'):  # unwrap Bench -> model
+        return unwrap_bench(model.model)
+    else:
+        return model

efficientdet/effdet/config/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .config_utils import set_config_readonly, set_config_writeable
+from .fpn_config import get_fpn_config
+from .model_config import get_efficientdet_config, default_detection_model_configs
+from .train_config import default_detection_train_config

efficientdet/effdet/config/config_utils.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from omegaconf import OmegaConf
+def set_config_readonly(conf):
+    OmegaConf.set_readonly(conf, True)
+def set_config_writeable(conf):
+    OmegaConf.set_readonly(conf, False)

efficientdet/effdet/config/fpn_config.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import itertools
+from omegaconf import OmegaConf
+def bifpn_config(min_level, max_level, weight_method=None):
+    """BiFPN config.
+    Adapted from https://github.com/google/automl/blob/56815c9986ffd4b508fe1d68508e268d129715c1/efficientdet/keras/fpn_configs.py
+    """
+    p = OmegaConf.create()
+    weight_method = weight_method or 'fastattn'
+    num_levels = max_level - min_level + 1
+    node_ids = {min_level + i: [i] for i in range(num_levels)}
+    level_last_id = lambda level: node_ids[level][-1]
+    level_all_ids = lambda level: node_ids[level]
+    id_cnt = itertools.count(num_levels)
+    p.nodes = []
+    for i in range(max_level - 1, min_level - 1, -1):
+        # top-down path.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [level_last_id(i), level_last_id(i + 1)],
+            'weight_method': weight_method,
+        })
+        node_ids[i].append(next(id_cnt))
+    for i in range(min_level + 1, max_level + 1):
+        # bottom-up path.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)],
+            'weight_method': weight_method,
+        })
+        node_ids[i].append(next(id_cnt))
+    return p
+def panfpn_config(min_level, max_level, weight_method=None):
+    """PAN FPN config.
+    This defines FPN layout from Path Aggregation Networks as an alternate to
+    BiFPN, it does not implement the full PAN spec.
+    Paper: https://arxiv.org/abs/1803.01534
+    """
+    p = OmegaConf.create()
+    weight_method = weight_method or 'fastattn'
+    num_levels = max_level - min_level + 1
+    node_ids = {min_level + i: [i] for i in range(num_levels)}
+    level_last_id = lambda level: node_ids[level][-1]
+    id_cnt = itertools.count(num_levels)
+    p.nodes = []
+    for i in range(max_level, min_level - 1, -1):
+        # top-down path.
+        offsets = [level_last_id(i), level_last_id(i + 1)] if i != max_level else [level_last_id(i)]
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': offsets,
+            'weight_method': weight_method,
+        })
+        node_ids[i].append(next(id_cnt))
+    for i in range(min_level, max_level + 1):
+        # bottom-up path.
+        offsets = [level_last_id(i), level_last_id(i - 1)] if i != min_level else [level_last_id(i)]
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': offsets,
+            'weight_method': weight_method,
+        })
+        node_ids[i].append(next(id_cnt))
+    return p
+def qufpn_config(min_level, max_level, weight_method=None):
+    """A dynamic quad fpn config that can adapt to different min/max levels.
+    It extends the idea of BiFPN, and has four paths:
+        (up_down -> bottom_up) + (bottom_up -> up_down).
+    Paper: https://ieeexplore.ieee.org/document/9225379
+    Ref code: From contribution to TF EfficientDet
+    https://github.com/google/automl/blob/eb74c6739382e9444817d2ad97c4582dbe9a9020/efficientdet/keras/fpn_configs.py
+    """
+    p = OmegaConf.create()
+    weight_method = weight_method or 'fastattn'
+    quad_method = 'fastattn'
+    num_levels = max_level - min_level + 1
+    node_ids = {min_level + i: [i] for i in range(num_levels)}
+    level_last_id = lambda level: node_ids[level][-1]
+    level_all_ids = lambda level: node_ids[level]
+    level_first_id = lambda level: node_ids[level][0]
+    id_cnt = itertools.count(num_levels)
+    p.nodes = []
+    for i in range(max_level - 1, min_level - 1, -1):
+        # top-down path 1.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [level_last_id(i), level_last_id(i + 1)],
+            'weight_method': weight_method
+        })
+        node_ids[i].append(next(id_cnt))
+    node_ids[max_level].append(node_ids[max_level][-1])
+    for i in range(min_level + 1, max_level):
+        # bottom-up path 2.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)],
+            'weight_method': weight_method
+        })
+        node_ids[i].append(next(id_cnt))
+    i = max_level
+    p.nodes.append({
+        'reduction': 1 << i,
+        'inputs_offsets': [level_first_id(i)] + [level_last_id(i - 1)],
+        'weight_method': weight_method
+    })
+    node_ids[i].append(next(id_cnt))
+    node_ids[min_level].append(node_ids[min_level][-1])
+    for i in range(min_level + 1, max_level + 1, 1):
+        # bottom-up path 3.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [
+                level_first_id(i), level_last_id(i - 1) if i != min_level + 1 else level_first_id(i - 1)],
+            'weight_method': weight_method
+        })
+        node_ids[i].append(next(id_cnt))
+    node_ids[min_level].append(node_ids[min_level][-1])
+    for i in range(max_level - 1, min_level, -1):
+        # top-down path 4.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [node_ids[i][0]] + [node_ids[i][-1]] + [level_last_id(i + 1)],
+            'weight_method': weight_method
+        })
+        node_ids[i].append(next(id_cnt))
+    i = min_level
+    p.nodes.append({
+        'reduction': 1 << i,
+        'inputs_offsets': [node_ids[i][0]] + [level_last_id(i + 1)],
+        'weight_method': weight_method
+    })
+    node_ids[i].append(next(id_cnt))
+    node_ids[max_level].append(node_ids[max_level][-1])
+    # NOTE: the order of the quad path is reversed from the original, my code expects the output of
+    # each FPN repeat to be same as input from backbone, in order of increasing reductions
+    for i in range(min_level, max_level + 1):
+        # quad-add path.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [node_ids[i][2], node_ids[i][4]],
+            'weight_method': quad_method
+        })
+        node_ids[i].append(next(id_cnt))
+    return p
+def get_fpn_config(fpn_name, min_level=3, max_level=7):
+    if not fpn_name:
+        fpn_name = 'bifpn_fa'
+    name_to_config = {
+        'bifpn_sum': bifpn_config(min_level=min_level, max_level=max_level, weight_method='sum'),
+        'bifpn_attn': bifpn_config(min_level=min_level, max_level=max_level, weight_method='attn'),
+        'bifpn_fa': bifpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'),
+        'pan_sum': panfpn_config(min_level=min_level, max_level=max_level, weight_method='sum'),
+        'pan_fa': panfpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'),
+        'qufpn_sum': qufpn_config(min_level=min_level, max_level=max_level, weight_method='sum'),
+        'qufpn_fa': qufpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'),
+    }
+    return name_to_config[fpn_name]

efficientdet/effdet/config/model_config.py ADDED Viewed

	@@ -0,0 +1,538 @@

+"""EfficientDet Configurations
+Adapted from official impl at https://github.com/google/automl/tree/master/efficientdet
+TODO use a different config system (OmegaConfig -> Hydra?), separate model from train specific hparams
+"""
+from omegaconf import OmegaConf
+from copy import deepcopy
+def default_detection_model_configs():
+    """Returns a default detection configs."""
+    h = OmegaConf.create()
+    # model name.
+    h.name = 'tf_efficientdet_d1'
+    h.backbone_name = 'tf_efficientnet_b1'
+    h.backbone_args = None  # FIXME sort out kwargs vs config for backbone creation
+    # model specific, input preprocessing parameters
+    h.image_size = (640, 640)
+    # dataset specific head parameters
+    h.num_classes = 90
+    # feature + anchor config
+    h.min_level = 3
+    h.max_level = 7
+    h.num_levels = h.max_level - h.min_level + 1
+    h.num_scales = 3
+    h.aspect_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
+    # ratio w/h: 2.0 means w=1.4, h=0.7. Can be computed with k-mean per dataset.
+    #h.aspect_ratios = [1.0, 2.0, 0.5]
+    h.anchor_scale = 4.0
+    # FPN and head config
+    h.pad_type = 'same'  # original TF models require an equivalent of Tensorflow 'SAME' padding
+    h.act_type = 'swish'
+    h.norm_layer = None  # defaults to batch norm when None
+    h.norm_kwargs = dict(eps=.001, momentum=.01)
+    h.box_class_repeats = 3
+    h.fpn_cell_repeats = 3
+    h.fpn_channels = 88
+    h.separable_conv = True
+    h.apply_bn_for_resampling = True
+    h.conv_after_downsample = False
+    h.conv_bn_relu_pattern = False
+    h.use_native_resize_op = False
+    h.pooling_type = None
+    h.redundant_bias = True  # original TF models have back to back bias + BN layers, not necessary!
+    h.head_bn_level_first = False  # change order of BN in head repeat list of lists, True for torchscript compat
+    h.fpn_name = None
+    h.fpn_config = None
+    h.fpn_drop_path_rate = 0.  # No stochastic depth in default. NOTE not currently used, unstable training
+    # classification loss (used by train bench)
+    h.alpha = 0.25
+    h.gamma = 1.5
+    h.label_smoothing = 0.  # only supported if new_focal == True
+    h.new_focal = False  # use new focal loss (supports label smoothing but uses more mem, less optimal w/ jit script)
+    h.jit_loss = False  # torchscript jit for loss fn speed improvement, can impact stability and/or increase mem usage
+    # localization loss (used by train bench)
+    h.delta = 0.1
+    h.box_loss_weight = 50.0
+    return h
+efficientdet_model_param_dict = dict(
+    # Models with PyTorch friendly padding and my PyTorch pretrained backbones, training TBD
+    efficientdet_d0=dict(
+        name='efficientdet_d0',
+        backbone_name='efficientnet_b0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/efficientdet_d0-f3276ba8.pth',
+    ),
+    efficientdet_d1=dict(
+        name='efficientdet_d1',
+        backbone_name='efficientnet_b1',
+        image_size=(640, 640),
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/efficientdet_d1-bb7e98fe.pth',
+    ),
+    efficientdet_d2=dict(
+        name='efficientdet_d2',
+        backbone_name='efficientnet_b2',
+        image_size=(768, 768),
+        fpn_channels=112,
+        fpn_cell_repeats=5,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_d3=dict(
+        name='efficientdet_d3',
+        backbone_name='efficientnet_b3',
+        image_size=(896, 896),
+        fpn_channels=160,
+        fpn_cell_repeats=6,
+        box_class_repeats=4,
+        pad_type='',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_d4=dict(
+        name='efficientdet_d4',
+        backbone_name='efficientnet_b4',
+        image_size=(1024, 1024),
+        fpn_channels=224,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+    ),
+    efficientdet_d5=dict(
+        name='efficientdet_d5',
+        backbone_name='efficientnet_b5',
+        image_size=(1280, 1280),
+        fpn_channels=288,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    # My own experimental configs with alternate models, training TBD
+    # Note: any 'timm' model in the EfficientDet family can be used as a backbone here.
+    resdet50=dict(
+        name='resdet50',
+        backbone_name='resnet50',
+        image_size=(640, 640),
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu',
+        redundant_bias=False,
+        separable_conv=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/resdet50_416-08676892.pth',
+    ),
+    cspresdet50=dict(
+        name='cspresdet50',
+        backbone_name='cspresnet50',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='leaky_relu',
+        redundant_bias=False,
+        separable_conv=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    cspresdext50=dict(
+        name='cspresdext50',
+        backbone_name='cspresnext50',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='leaky_relu',
+        redundant_bias=False,
+        separable_conv=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    cspresdext50pan=dict(
+        name='cspresdext50pan',
+        backbone_name='cspresnext50',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='leaky_relu',
+        fpn_name='pan_fa',  # PAN FPN experiment
+        redundant_bias=False,
+        separable_conv=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    cspdarkdet53=dict(
+        name='cspdarkdet53',
+        backbone_name='cspdarknet53',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='leaky_relu',
+        redundant_bias=False,
+        separable_conv=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    mixdet_m=dict(
+        name='mixdet_m',
+        backbone_name='mixnet_m',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',  # no pretrained weights yet
+    ),
+    mixdet_l=dict(
+        name='mixdet_l',
+        backbone_name='mixnet_l',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    mobiledetv2_110d=dict(
+        name='mobiledetv2_110d',
+        backbone_name='mobilenetv2_110d',
+        image_size=(384, 384),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=48,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu6',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.05),
+        url='',  # no pretrained weights yet
+    ),
+    mobiledetv2_120d=dict(
+        name='mobiledetv2_120d',
+        backbone_name='mobilenetv2_120d',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=56,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu6',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',  # no pretrained weights yet
+    ),
+    mobiledetv3_large=dict(
+        name='mobiledetv3_large',
+        backbone_name='mobilenetv3_large_100',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='hard_swish',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_q0=dict(
+        name='efficientdet_q0',
+        backbone_name='efficientnet_b0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        fpn_name='qufpn_fa',  # quad-fpn + fast attn experiment
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',
+    ),
+    efficientdet_w0=dict(
+        name='efficientdet_w0',  # 'wide'
+        backbone_name='efficientnet_b0',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=80,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(
+            drop_path_rate=0.1,
+            feature_location='depthwise'),  # features from after DW/SE in IR block
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_es=dict(
+        name='efficientdet_es',   #EdgeTPU-Small
+        backbone_name='efficientnet_es',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=72,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        separable_conv=False,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',
+    ),
+    efficientdet_em=dict(
+        name='efficientdet_em',  # Edge-TPU Medium
+        backbone_name='efficientnet_em',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=96,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        separable_conv=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_lite0=dict(
+        name='efficientdet_lite0',
+        backbone_name='efficientnet_lite0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        act_type='relu',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',
+    ),
+    # Models ported from Tensorflow with pretrained backbones ported from Tensorflow
+    tf_efficientdet_d0=dict(
+        name='tf_efficientdet_d0',
+        backbone_name='tf_efficientnet_b0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d0_34-f153e0cf.pth',
+    ),
+    tf_efficientdet_d1=dict(
+        name='tf_efficientdet_d1',
+        backbone_name='tf_efficientnet_b1',
+        image_size=(640, 640),
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d1_40-a30f94af.pth'
+    ),
+    tf_efficientdet_d2=dict(
+        name='tf_efficientdet_d2',
+        backbone_name='tf_efficientnet_b2',
+        image_size=(768, 768),
+        fpn_channels=112,
+        fpn_cell_repeats=5,
+        box_class_repeats=3,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d2_43-8107aa99.pth',
+    ),
+    tf_efficientdet_d3=dict(
+        name='tf_efficientdet_d3',
+        backbone_name='tf_efficientnet_b3',
+        image_size=(896, 896),
+        fpn_channels=160,
+        fpn_cell_repeats=6,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d3_47-0b525f35.pth',
+    ),
+    tf_efficientdet_d4=dict(
+        name='tf_efficientdet_d4',
+        backbone_name='tf_efficientnet_b4',
+        image_size=(1024, 1024),
+        fpn_channels=224,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d4_49-f56376d9.pth',
+    ),
+    tf_efficientdet_d5=dict(
+        name='tf_efficientdet_d5',
+        backbone_name='tf_efficientnet_b5',
+        image_size=(1280, 1280),
+        fpn_channels=288,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d5_51-c79f9be6.pth',
+    ),
+    tf_efficientdet_d6=dict(
+        name='tf_efficientdet_d6',
+        backbone_name='tf_efficientnet_b6',
+        image_size=(1280, 1280),
+        fpn_channels=384,
+        fpn_cell_repeats=8,
+        box_class_repeats=5,
+        fpn_name='bifpn_sum',  # Use unweighted sum for training stability.
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d6_52-4eda3773.pth'
+    ),
+    tf_efficientdet_d7=dict(
+        name='tf_efficientdet_d7',
+        backbone_name='tf_efficientnet_b6',
+        image_size=(1536, 1536),
+        fpn_channels=384,
+        fpn_cell_repeats=8,
+        box_class_repeats=5,
+        anchor_scale=5.0,
+        fpn_name='bifpn_sum',  # Use unweighted sum for training stability.
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d7_53-6d1d7a95.pth'
+    ),
+    tf_efficientdet_d7x=dict(
+        name='tf_efficientdet_d7x',
+        backbone_name='tf_efficientnet_b7',
+        image_size=(1536, 1536),
+        fpn_channels=384,
+        fpn_cell_repeats=8,
+        box_class_repeats=5,
+        anchor_scale=4.0,
+        max_level=8,
+        fpn_name='bifpn_sum',  # Use unweighted sum for training stability.
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d7x-f390b87c.pth'
+    ),
+    # The lite configs are in TF automl repository but no weights yet and listed as 'not final'
+    tf_efficientdet_lite0=dict(
+        name='tf_efficientdet_lite0',
+        backbone_name='tf_efficientnet_lite0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        act_type='relu',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.1),
+        # unlike other tf_ models, this was not ported from tf automl impl, but trained from tf pretrained efficient lite
+        # weights using this code, will likely replace if/when official det-lite weights are released
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_lite0-f5f303a9.pth',
+    ),
+    tf_efficientdet_lite1=dict(
+        name='tf_efficientdet_lite1',
+        backbone_name='tf_efficientnet_lite1',
+        image_size=(640, 640),
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        act_type='relu',
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    tf_efficientdet_lite2=dict(
+        name='tf_efficientdet_lite2',
+        backbone_name='tf_efficientnet_lite2',
+        image_size=(768, 768),
+        fpn_channels=112,
+        fpn_cell_repeats=5,
+        box_class_repeats=3,
+        act_type='relu',
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    tf_efficientdet_lite3=dict(
+        name='tf_efficientdet_lite3',
+        backbone_name='tf_efficientnet_lite3',
+        image_size=(896, 896),
+        fpn_channels=160,
+        fpn_cell_repeats=6,
+        box_class_repeats=4,
+        act_type='relu',
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    tf_efficientdet_lite4=dict(
+        name='tf_efficientdet_lite4',
+        backbone_name='tf_efficientnet_lite4',
+        image_size=(1024, 1024),
+        fpn_channels=224,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        act_type='relu',
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+)
+def get_efficientdet_config(model_name='tf_efficientdet_d1'):
+    """Get the default config for EfficientDet based on model name."""
+    h = default_detection_model_configs()
+    h.update(efficientdet_model_param_dict[model_name])
+    h.num_levels = h.max_level - h.min_level + 1
+    return deepcopy(h)  # may be unnecessary, ensure no references to param dict values

efficientdet/effdet/config/train_config.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from omegaconf import OmegaConf
+def default_detection_train_config():
+    # FIXME currently using args for train config, will revisit, perhaps move to Hydra
+    h = OmegaConf.create()
+    # dataset
+    h.skip_crowd_during_training = True
+    # augmentation
+    h.input_rand_hflip = True
+    h.train_scale_min = 0.1
+    h.train_scale_max = 2.0
+    h.autoaugment_policy = None
+    # optimization
+    h.momentum = 0.9
+    h.learning_rate = 0.08
+    h.lr_warmup_init = 0.008
+    h.lr_warmup_epoch = 1.0
+    h.first_lr_drop_epoch = 200.0
+    h.second_lr_drop_epoch = 250.0
+    h.clip_gradients_norm = 10.0
+    h.num_epochs = 300
+    # regularization l2 loss.
+    h.weight_decay = 4e-5
+    h.lr_decay_method = 'cosine'
+    h.moving_average_decay = 0.9998
+    h.ckpt_var_scope = None
+    return h

efficientdet/effdet/data/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .dataset_factory import create_dataset
+from .dataset import DetectionDatset, SkipSubset
+from .input_config import resolve_input_config
+from .loader import create_loader
+from .parsers import create_parser
+from .transforms import *

efficientdet/effdet/data/dataset.py ADDED Viewed

	@@ -0,0 +1,145 @@

+""" Detection dataset
+Hacked together by Ross Wightman
+"""
+import torch.utils.data as data
+import numpy as np
+import albumentations as A
+import torch
+from PIL import Image
+from .parsers import create_parser
+class DetectionDatset(data.Dataset):
+    """`Object Detection Dataset. Use with parsers for COCO, VOC, and OpenImages.
+    Args:
+        parser (string, Parser):
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.ToTensor``
+    """
+    def __init__(self, data_dir, parser=None, parser_kwargs=None, transform=None, transforms=None):
+        super(DetectionDatset, self).__init__()
+        parser_kwargs = parser_kwargs or {}
+        self.data_dir = data_dir
+        if isinstance(parser, str):
+            self._parser = create_parser(parser, **parser_kwargs)
+        else:
+            assert parser is not None and len(parser.img_ids)
+            self._parser = parser
+        self._transform = transform
+        self._transforms = transforms
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: Tuple (image, annotations (target)).
+        """
+        img_info = self._parser.img_infos[index]
+        target = dict(img_idx=index, img_size=(img_info['width'], img_info['height']))
+        if self._parser.has_labels:
+            ann = self._parser.get_ann_info(index)
+            target.update(ann)
+        img_path = self.data_dir / img_info['file_name']
+        img = Image.open(img_path).convert('RGB')
+        if self.transforms is not None:
+            img = torch.as_tensor(np.array(img), dtype=torch.uint8)
+            voc_boxes = []
+            for coord in target['bbox']:
+                xmin = coord[1]
+                ymin = coord[0]
+                xmax = coord[3]
+                ymax = coord[2]
+                if xmin<1:
+                    xmin = 1
+                if ymin<1:
+                    ymin = 1
+                if xmax>=img.shape[1]-1:
+                    xmax = img.shape[1]-1
+                if ymax>=img.shape[0]-1:
+                    ymax = img.shape[0]-1
+                voc_boxes.append([xmin, ymin, xmax, ymax])
+            transformed = self.transforms(image=np.array(img), bbox_classes=target['cls'], bboxes=voc_boxes)
+            img = torch.as_tensor(transformed['image'], dtype=torch.uint8)
+            target['bbox'] = []
+            for coord in transformed['bboxes']:
+                ymin = int(coord[1])
+                xmin = int(coord[0])
+                ymax = int(coord[3])
+                xmax = int(coord[2])
+                target['bbox'].append([ymin, xmin, ymax, xmax])
+            target['bbox'] = np.array(target['bbox'], dtype=np.float32)
+            target['cls'] = np.array(transformed['bbox_classes'])
+            img = Image.fromarray(np.array(img).astype('uint8'), 'RGB')
+            target['img_size'] = img.size
+        if self.transform is not None:
+            img, target = self.transform(img, target)
+        return img, target
+    def __len__(self):
+        return len(self._parser.img_ids)
+    @property
+    def parser(self):
+        return self._parser
+    @property
+    def transform(self):
+        return self._transform
+    @transform.setter
+    def transform(self, t):
+        self._transform = t
+    @property
+    def transforms(self):
+        return self._transforms
+    @transforms.setter
+    def transforms(self, t):
+        self._transforms = t
+class SkipSubset(data.Dataset):
+    r"""
+    Subset of a dataset at specified indices.
+    Arguments:
+        dataset (Dataset): The whole Dataset
+        n (int): skip rate (select every nth)
+    """
+    def __init__(self, dataset, n=2):
+        self.dataset = dataset
+        assert n >= 1
+        self.indices = np.arange(len(dataset))[::n]
+    def __getitem__(self, idx):
+        return self.dataset[self.indices[idx]]
+    def __len__(self):
+        return len(self.indices)
+    @property
+    def parser(self):
+        return self.dataset.parser
+    @property
+    def transform(self):
+        return self.dataset.transform
+    @transform.setter
+    def transform(self, t):
+        self.dataset.transform = t
+    @property
+    def transforms(self):
+        return self.dataset.transforms
+    @transforms.setter
+    def transforms(self, t):
+        self.dataset.transforms = t

efficientdet/effdet/data/dataset_config.py ADDED Viewed

	@@ -0,0 +1,194 @@

+""" COCO detect-waste dataset configurations
+Updated 2021 Wimlds in Detect Waste in Pomerania
+"""
+from dataclasses import dataclass
+from typing import Dict
+@dataclass
+class CocoCfg:
+    variant: str = None
+    parser: str = 'coco'
+    num_classes: int = 80
+    splits: Dict[str, dict] = None
+@dataclass
+class TACOCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 28
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+@dataclass
+class DetectwasteCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 7
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+@dataclass
+class BinaryCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+@dataclass
+class BinaryMultiCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+@dataclass
+class TrashCanCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 8
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+@dataclass
+class UAVVasteCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+@dataclass
+class ICRACfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 7
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+@dataclass
+class DrinkWasteCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 4
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+@dataclass
+class MJU_WasteCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+@dataclass
+class WadeCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }

efficientdet/effdet/data/dataset_factory.py ADDED Viewed

	@@ -0,0 +1,85 @@

+""" Dataset factory
+Updated 2021 Wimlds in Detect Waste in Pomerania
+"""
+from collections import OrderedDict
+from pathlib import Path
+from .dataset_config import *
+from .parsers import *
+from .dataset import DetectionDatset
+from .parsers import create_parser
+# list of detect-waste datasets
+waste_datasets_list = ['taco', 'detectwaste', 'binary', 'multi',
+                       'uav', 'mju', 'trashcan', 'wade', 'icra'
+                       'drinkwaste']
+def create_dataset(name, root, ann, splits=('train', 'val')):
+    if isinstance(splits, str):
+        splits = (splits,)
+    name = name.lower()
+    root = Path(root)
+    dataset_cls = DetectionDatset
+    datasets = OrderedDict()
+    if name.startswith('coco'):
+        if 'coco2014' in name:
+            dataset_cfg = Coco2014Cfg()
+        else:
+            dataset_cfg = Coco2017Cfg()
+        for s in splits:
+            if s not in dataset_cfg.splits:
+                raise RuntimeError(f'{s} split not found in config')
+            split_cfg = dataset_cfg.splits[s]
+            ann_file = root / split_cfg['ann_filename']
+            parser_cfg = CocoParserCfg(
+                ann_filename=ann_file,
+                has_labels=split_cfg['has_labels']
+            )
+            datasets[s] = dataset_cls(
+                data_dir=root / Path(split_cfg['img_dir']),
+                parser=create_parser(dataset_cfg.parser, cfg=parser_cfg),
+            )
+        datasets = OrderedDict()
+    elif name in waste_datasets_list:
+        if name.startswith('taco'):
+            dataset_cfg = TACOCfg(root=root, ann=ann)
+        elif name.startswith('detectwaste'):
+            dataset_cfg = DetectwasteCfg(root=root, ann=ann)
+        elif name.startswith('binary'):
+            dataset_cfg = BinaryCfg(root=root, ann=ann)
+        elif name.startswith('multi'):
+            dataset_cfg = BinaryMultiCfg(root=root, ann=ann)
+        elif name.startswith('uav'):
+            dataset_cfg = UAVVasteCfg(root=root, ann=ann)
+        elif name.startswith('trashcan'):
+            dataset_cfg = TrashCanCfg(root=root, ann=ann)
+        elif name.startswith('drinkwaste'):
+            dataset_cfg = DrinkWasteCfg(root=root, ann=ann)
+        elif name.startswith('mju'):
+            dataset_cfg = MJU_WasteCfg(root=root, ann=ann)
+        elif name.startswith('wade'):
+            dataset_cfg = WadeCfg(root=root, ann=ann)
+        elif name.startswith('icra'):
+            dataset_cfg = ICRACfg(root=root, ann=ann)
+        else:
+            assert False, f'Unknown dataset parser ({name})'
+        dataset_cfg.add_split()
+        for s in splits:
+            if s not in dataset_cfg.splits:
+                raise RuntimeError(f'{s} split not found in config')
+            split_cfg = dataset_cfg.splits[s]
+            parser_cfg = CocoParserCfg(
+                ann_filename=split_cfg['ann_filename'],
+                has_labels=split_cfg['has_labels']
+            )
+            datasets[s] = dataset_cls(
+                data_dir=split_cfg['img_dir'],
+                parser=create_parser(dataset_cfg.parser, cfg=parser_cfg),
+            )
+    else:
+        assert False, f'Unknown dataset parser ({name})'
+    datasets = list(datasets.values())
+    return datasets if len(datasets) > 1 else datasets[0]

efficientdet/effdet/data/input_config.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from .transforms import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+def resolve_input_config(args, model_config=None, model=None):
+    if not isinstance(args, dict):
+        args = vars(args)
+    input_config = {}
+    if not model_config and model is not None and hasattr(model, 'config'):
+        model_config = model.config
+    # Resolve input/image size
+    in_chans = 3
+    input_size = (in_chans, 512, 512)
+    if 'input_size' in model_config:
+        input_size = tuple(model_config['input_size'])
+    elif 'image_size' in model_config:
+        input_size = (in_chans,) + tuple(model_config['image_size'])
+    assert isinstance(input_size, tuple) and len(input_size) == 3
+    input_config['input_size'] = input_size
+    # resolve interpolation method
+    input_config['interpolation'] = 'bicubic'
+    if 'interpolation' in args and args['interpolation']:
+        input_config['interpolation'] = args['interpolation']
+    elif 'interpolation' in model_config:
+        input_config['interpolation'] = model_config['interpolation']
+    # resolve dataset + model mean for normalization
+    input_config['mean'] = IMAGENET_DEFAULT_MEAN
+    if 'mean' in args and args['mean'] is not None:
+        mean = tuple(args['mean'])
+        if len(mean) == 1:
+            mean = tuple(list(mean) * in_chans)
+        else:
+            assert len(mean) == in_chans
+        input_config['mean'] = mean
+    elif 'mean' in model_config:
+        input_config['mean'] = model_config['mean']
+    # resolve dataset + model std deviation for normalization
+    input_config['std'] = IMAGENET_DEFAULT_STD
+    if 'std' in args and args['std'] is not None:
+        std = tuple(args['std'])
+        if len(std) == 1:
+            std = tuple(list(std) * in_chans)
+        else:
+            assert len(std) == in_chans
+        input_config['std'] = std
+    elif 'std' in model_config:
+        input_config['std'] = model_config['std']
+    # resolve letterbox fill color
+    input_config['fill_color'] = 'mean'
+    if 'fill_color' in args and args['fill_color'] is not None:
+        input_config['fill_color'] = args['fill_color']
+    elif 'fill_color' in model_config:
+        input_config['fill_color'] = model_config['fill_color']
+    return input_config

efficientdet/effdet/data/loader.py ADDED Viewed

	@@ -0,0 +1,226 @@

+""" Object detection loader/collate
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch.utils.data
+from .transforms import *
+from .transforms_albumentation import get_transform
+from .random_erasing import RandomErasing
+from effdet.anchors import AnchorLabeler
+from timm.data.distributed_sampler import OrderedDistributedSampler
+import os
+MAX_NUM_INSTANCES = 100
+class DetectionFastCollate:
+    """ A detection specific, optimized collate function w/ a bit of state.
+    Optionally performs anchor labelling. Doing this here offloads some work from the
+    GPU and the main training process thread and increases the load on the dataloader
+    threads.
+    """
+    def __init__(
+            self,
+            instance_keys=None,
+            instance_shapes=None,
+            instance_fill=-1,
+            max_instances=MAX_NUM_INSTANCES,
+            anchor_labeler=None,
+    ):
+        instance_keys = instance_keys or {'bbox', 'bbox_ignore', 'cls'}
+        instance_shapes = instance_shapes or dict(
+            bbox=(max_instances, 4), bbox_ignore=(max_instances, 4), cls=(max_instances,))
+        self.instance_info = {k: dict(fill=instance_fill, shape=instance_shapes[k]) for k in instance_keys}
+        self.max_instances = max_instances
+        self.anchor_labeler = anchor_labeler
+    def __call__(self, batch):
+        batch_size = len(batch)
+        target = dict()
+        labeler_outputs = dict()
+        img_tensor = torch.zeros((batch_size, *batch[0][0].shape), dtype=torch.uint8)
+        for i in range(batch_size):
+            img_tensor[i] += torch.from_numpy(batch[i][0])
+            labeler_inputs = {}
+            for tk, tv in batch[i][1].items():
+                instance_info = self.instance_info.get(tk, None)
+                if instance_info is not None:
+                    # target tensor is associated with a detection instance
+                    tv = torch.from_numpy(tv).to(dtype=torch.float32)
+                    if self.anchor_labeler is None:
+                        if i == 0:
+                            shape = (batch_size,) + instance_info['shape']
+                            target_tensor = torch.full(shape, instance_info['fill'], dtype=torch.float32)
+                            target[tk] = target_tensor
+                        else:
+                            target_tensor = target[tk]
+                        num_elem = min(tv.shape[0], self.max_instances)
+                        target_tensor[i, 0:num_elem] = tv[0:num_elem]
+                    else:
+                        # no need to pass gt tensors through when labeler in use
+                        if tk in ('bbox', 'cls'):
+                            labeler_inputs[tk] = tv
+                else:
+                    # target tensor is an image-level annotation / metadata
+                    if i == 0:
+                        # first batch elem, create destination tensors
+                        if isinstance(tv, (tuple, list)):
+                            # per batch elem sequence
+                            shape = (batch_size, len(tv))
+                            dtype = torch.float32 if isinstance(tv[0], (float, np.floating)) else torch.int32
+                        else:
+                            # per batch elem scalar
+                            shape = batch_size,
+                            dtype = torch.float32 if isinstance(tv, (float, np.floating)) else torch.int64
+                        target_tensor = torch.zeros(shape, dtype=dtype)
+                        target[tk] = target_tensor
+                    else:
+                        target_tensor = target[tk]
+                    target_tensor[i] = torch.tensor(tv, dtype=target_tensor.dtype)
+            if self.anchor_labeler is not None:
+                cls_targets, box_targets, num_positives = self.anchor_labeler.label_anchors(
+                    labeler_inputs['bbox'], labeler_inputs['cls'], filter_valid=False)
+                if i == 0:
+                    # first batch elem, create destination tensors, separate key per level
+                    for j, (ct, bt) in enumerate(zip(cls_targets, box_targets)):
+                        labeler_outputs[f'label_cls_{j}'] = torch.zeros(
+                            (batch_size,) + ct.shape, dtype=torch.int64)
+                        labeler_outputs[f'label_bbox_{j}'] = torch.zeros(
+                            (batch_size,) + bt.shape, dtype=torch.float32)
+                    labeler_outputs['label_num_positives'] = torch.zeros(batch_size)
+                for j, (ct, bt) in enumerate(zip(cls_targets, box_targets)):
+                    labeler_outputs[f'label_cls_{j}'][i] = ct
+                    labeler_outputs[f'label_bbox_{j}'][i] = bt
+                labeler_outputs['label_num_positives'][i] = num_positives
+        if labeler_outputs:
+            target.update(labeler_outputs)
+        return img_tensor, target
+class PrefetchLoader:
+    def __init__(self,
+            loader,
+            mean=IMAGENET_DEFAULT_MEAN,
+            std=IMAGENET_DEFAULT_STD,
+            re_prob=0.,
+            re_mode='pixel',
+            re_count=1,
+            ):
+        self.loader = loader
+        self.mean = torch.tensor([x * 255 for x in mean]).cuda().view(1, 3, 1, 1)
+        self.std = torch.tensor([x * 255 for x in std]).cuda().view(1, 3, 1, 1)
+        if re_prob > 0.:
+            self.random_erasing = RandomErasing(probability=re_prob, mode=re_mode, max_count=re_count)
+        else:
+            self.random_erasing = None
+    def __iter__(self):
+        stream = torch.cuda.Stream()
+        first = True
+        for next_input, next_target in self.loader:
+            with torch.cuda.stream(stream):
+                next_input = next_input.cuda(non_blocking=True)
+                next_input = next_input.float().sub_(self.mean).div_(self.std)
+                next_target = {k: v.cuda(non_blocking=True) for k, v in next_target.items()}
+                if self.random_erasing is not None:
+                    next_input = self.random_erasing(next_input, next_target)
+            if not first:
+                yield input, target
+            else:
+                first = False
+            torch.cuda.current_stream().wait_stream(stream)
+            input = next_input
+            target = next_target
+        yield input, target
+    def __len__(self):
+        return len(self.loader)
+    @property
+    def sampler(self):
+        return self.loader.sampler
+    @property
+    def dataset(self):
+        return self.loader.dataset
+def create_loader(
+        dataset,
+        input_size,
+        batch_size,
+        is_training=False,
+        use_prefetcher=True,
+        re_prob=0.,
+        re_mode='pixel',
+        re_count=1,
+        interpolation='bilinear',
+        fill_color='mean',
+        mean=IMAGENET_DEFAULT_MEAN,
+        std=IMAGENET_DEFAULT_STD,
+        num_workers=1,
+        distributed=False,
+        pin_mem=False,
+        anchor_labeler=None,
+):
+    if isinstance(input_size, tuple):
+        img_size = input_size[-2:]
+    else:
+        img_size = input_size
+    if is_training:
+        transforms = get_transform()
+        transform = transforms_coco_train(
+            img_size,
+            interpolation=interpolation,
+            use_prefetcher=use_prefetcher,
+            fill_color=fill_color,
+            mean=mean,
+            std=std)
+    else:
+        transforms = None
+        transform = transforms_coco_eval(
+            img_size,
+            interpolation=interpolation,
+            use_prefetcher=use_prefetcher,
+            fill_color=fill_color,
+            mean=mean,
+            std=std)
+    dataset.transforms = transforms
+    dataset.transform = transform
+    sampler = None
+    if distributed:
+        if is_training:
+            sampler = torch.utils.data.distributed.DistributedSampler(dataset)
+        else:
+            # This will add extra duplicate entries to result in equal num
+            # of samples per-process, will slightly alter validation results
+            sampler = OrderedDistributedSampler(dataset)
+    collate_fn = DetectionFastCollate(anchor_labeler=anchor_labeler)
+    loader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=sampler is None and is_training,
+        num_workers=num_workers,
+        sampler=sampler,
+        pin_memory=pin_mem,
+        collate_fn=collate_fn,
+    )
+    if use_prefetcher:
+        if is_training:
+            loader = PrefetchLoader(loader, mean=mean, std=std, re_prob=re_prob, re_mode=re_mode, re_count=re_count)
+        else:
+            loader = PrefetchLoader(loader, mean=mean, std=std)
+    return loader

efficientdet/effdet/data/parsers/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .parser_config import OpenImagesParserCfg, CocoParserCfg, VocParserCfg
2	+ from .parser_factory import create_parser

efficientdet/effdet/data/parsers/parser.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from numbers import Integral
+from typing import List, Union, Dict, Any
+class Parser:
+    """ Parser base class.
+    The attributes listed below make up a public interface common to all parsers. They can be accessed directly
+    once the dataset is constructed and annotations are populated.
+    Attributes:
+        cat_names (list[str]):
+            list of category (class) names, with background class at position 0.
+        cat_ids (list[union[str, int]):
+            list of dataset specific, unique integer or string category ids, does not include background
+        cat_id_to_label (dict):
+            map from category id to integer 1-indexed class label
+        img_ids (list):
+            list of dataset specific, unique image ids corresponding to valid samples in dataset
+        img_ids_invalid (list):
+            list of image ids corresponding to invalid images, not used as samples
+        img_infos (list[dict]):
+            image info, list of info dicts with filename, width, height for each image sample
+    """
+    def __init__(
+            self,
+            bbox_yxyx: bool = False,
+            has_labels: bool = True,
+            include_masks: bool = False,
+            include_bboxes_ignore: bool = False,
+            ignore_empty_gt: bool = False,
+            min_img_size: int = 32,
+    ):
+        """
+        Args:
+            yxyx (bool): output coords in yxyx format, otherwise xyxy
+            has_labels (bool): dataset has labels (for training validation, False usually for test sets)
+            include_masks (bool): include segmentation masks in target output (not supported yet for any dataset)
+            include_bboxes_ignore (bool): include ignored bbox in target output
+            ignore_empty_gt (bool): ignore images with no ground truth (no negative images)
+            min_img_size (bool): ignore images with width or height smaller than this number
+            sub_sample (int): sample every N images from the dataset
+        """
+        # parser config, determines how dataset parsed and validated
+        self.yxyx = bbox_yxyx
+        self.has_labels = has_labels
+        self.include_masks = include_masks
+        self.include_bboxes_ignore = include_bboxes_ignore
+        self.ignore_empty_gt = ignore_empty_gt
+        self.min_img_size = min_img_size
+        self.label_offset = 1
+        # Category (class) metadata. Populated by _load_annotations()
+        self.cat_names: List[str] = []
+        self.cat_ids: List[Union[str, Integral]] = []
+        self.cat_id_to_label: Dict[Union[str, Integral], Integral] = dict()
+        # Image metadata. Populated by _load_annotations()
+        self.img_ids: List[Union[str, Integral]] = []
+        self.img_ids_invalid: List[Union[str, Integral]] = []
+        self.img_infos: List[Dict[str, Any]] = []
+    @property
+    def cat_dicts(self):
+        """return category names and labels in format compatible with TF Models Evaluator
+        list[dict(name=<class name>, id=<class label>)]
+        """
+        return [
+            dict(
+                name=name,
+                id=cat_id if not self.cat_id_to_label else self.cat_id_to_label[cat_id]
+            ) for name, cat_id in zip(self.cat_names, self.cat_ids)]
+    @property
+    def max_label(self):
+        if self.cat_id_to_label:
+            return max(self.cat_id_to_label.values())
+        else:
+            assert len(self.cat_ids) and isinstance(self.cat_ids[0], Integral)
+            return max(self.cat_ids)

efficientdet/effdet/data/parsers/parser_coco.py ADDED Viewed

	@@ -0,0 +1,93 @@

+""" COCO dataset parser
+Copyright 2020 Ross Wightman
+"""
+import numpy as np
+from pycocotools.coco import COCO
+from .parser import Parser
+from .parser_config import CocoParserCfg
+class CocoParser(Parser):
+    def __init__(self, cfg: CocoParserCfg):
+        super().__init__(
+            bbox_yxyx=cfg.bbox_yxyx,
+            has_labels=cfg.has_labels,
+            include_masks=cfg.include_masks,
+            include_bboxes_ignore=cfg.include_bboxes_ignore,
+            ignore_empty_gt=cfg.has_labels and cfg.ignore_empty_gt,
+            min_img_size=cfg.min_img_size
+        )
+        self.cat_ids_as_labels = True  # this is the default for original TF EfficientDet models
+        self.coco = None
+        self._load_annotations(cfg.ann_filename)
+    def get_ann_info(self, idx):
+        img_id = self.img_ids[idx]
+        return self._parse_img_ann(img_id)
+    def _load_annotations(self, ann_file):
+        assert self.coco is None
+        self.coco = COCO(ann_file)
+        self.cat_ids = self.coco.getCatIds()
+        self.cat_names = [c['name'] for c in self.coco.loadCats(ids=self.cat_ids)]
+        if not self.cat_ids_as_labels:
+            self.cat_id_to_label = {cat_id: i + self.label_offset for i, cat_id in enumerate(self.cat_ids)}
+        img_ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
+        for img_id in sorted(self.coco.imgs.keys()):
+            info = self.coco.loadImgs([img_id])[0]
+            if (min(info['width'], info['height']) < self.min_img_size or
+                    (self.ignore_empty_gt and img_id not in img_ids_with_ann)):
+                self.img_ids_invalid.append(img_id)
+                continue
+            self.img_ids.append(img_id)
+            self.img_infos.append(info)
+    def _parse_img_ann(self, img_id):
+        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
+        ann_info = self.coco.loadAnns(ann_ids)
+        bboxes = []
+        bboxes_ignore = []
+        cls = []
+        for i, ann in enumerate(ann_info):
+            if ann.get('ignore', False):
+                continue
+            x1, y1, w, h = ann['bbox']
+            if self.include_masks and ann['area'] <= 0:
+                continue
+            if w < 1 or h < 1:
+                continue
+            if self.yxyx:
+                bbox = [y1, x1, y1 + h, x1 + w]
+            else:
+                bbox = [x1, y1, x1 + w, y1 + h]
+            if ann.get('iscrowd', False):
+                if self.include_bboxes_ignore:
+                    bboxes_ignore.append(bbox)
+            else:
+                bboxes.append(bbox)
+                cls.append(self.cat_id_to_label[ann['category_id']] if self.cat_id_to_label else ann['category_id'])
+        if bboxes:
+            bboxes = np.array(bboxes, ndmin=2, dtype=np.float32)
+            cls = np.array(cls, dtype=np.int64)
+        else:
+            bboxes = np.zeros((0, 4), dtype=np.float32)
+            cls = np.array([], dtype=np.int64)
+        if self.include_bboxes_ignore:
+            if bboxes_ignore:
+                bboxes_ignore = np.array(bboxes_ignore, ndmin=2, dtype=np.float32)
+            else:
+                bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+        ann = dict(bbox=bboxes, cls=cls)
+        if self.include_bboxes_ignore:
+            ann['bbox_ignore'] = bboxes_ignore
+        return ann

efficientdet/effdet/data/parsers/parser_config.py ADDED Viewed

	@@ -0,0 +1,49 @@

+""" Dataset parser configs
+Copyright 2020 Ross Wightman
+"""
+from dataclasses import dataclass
+__all__ = ['CocoParserCfg', 'OpenImagesParserCfg', 'VocParserCfg']
+@dataclass
+class CocoParserCfg:
+    ann_filename: str  # absolute path
+    include_masks: bool = False
+    include_bboxes_ignore: bool = False
+    has_labels: bool = True
+    bbox_yxyx: bool = True
+    min_img_size: int = 32
+    ignore_empty_gt: bool = False
+@dataclass
+class VocParserCfg:
+    split_filename: str
+    ann_filename: str
+    img_filename: str = '%.jpg'
+    keep_difficult: bool = True
+    classes: list = None
+    add_background: bool = True
+    has_labels: bool = True
+    bbox_yxyx: bool = True
+    min_img_size: int = 32
+    ignore_empty_gt: bool = False
+@dataclass
+class OpenImagesParserCfg:
+    categories_filename: str
+    img_info_filename: str
+    bbox_filename: str
+    img_label_filename: str = ''
+    masks_filename: str = ''
+    img_filename: str = '%s.jpg'  # relative to dataset img_dir
+    task: str = 'obj'
+    prefix_levels: int = 1
+    add_background: bool = True
+    has_labels: bool = True
+    bbox_yxyx: bool = True
+    min_img_size: int = 32
+    ignore_empty_gt: bool = False

efficientdet/effdet/data/parsers/parser_factory.py ADDED Viewed

	@@ -0,0 +1,19 @@

+""" Parser factory
+Copyright 2020 Ross Wightman
+"""
+from .parser_coco import CocoParser
+from .parser_voc import VocParser
+from .parser_open_images import OpenImagesParser
+def create_parser(name, **kwargs):
+    if name == 'coco':
+        parser = CocoParser(**kwargs)
+    elif name == 'voc':
+        parser = VocParser(**kwargs)
+    elif name == 'openimages':
+        parser = OpenImagesParser(**kwargs)
+    else:
+        assert False, f'Unknown dataset parser ({name})'
+    return parser

efficientdet/effdet/data/parsers/parser_open_images.py ADDED Viewed

	@@ -0,0 +1,211 @@

+""" OpenImages dataset parser
+Copyright 2020 Ross Wightman
+"""
+import numpy as np
+import os
+import logging
+from .parser import Parser
+from .parser_config import OpenImagesParserCfg
+_logger = logging.getLogger(__name__)
+class OpenImagesParser(Parser):
+    def __init__(self, cfg: OpenImagesParserCfg):
+        super().__init__(
+            bbox_yxyx=cfg.bbox_yxyx,
+            has_labels=cfg.has_labels,
+            include_masks=False,  # FIXME to support someday
+            include_bboxes_ignore=False,
+            ignore_empty_gt=cfg.has_labels and cfg.ignore_empty_gt,
+            min_img_size=cfg.min_img_size
+        )
+        self.img_prefix_levels = cfg.prefix_levels
+        self.mask_prefix_levels = 1
+        self._anns = None  # access via get_ann_info()
+        self._img_to_ann = None
+        self._load_annotations(
+            categories_filename=cfg.categories_filename,
+            img_info_filename=cfg.img_info_filename,
+            img_filename=cfg.img_filename,
+            masks_filename=cfg.masks_filename,
+            bbox_filename=cfg.bbox_filename
+        )
+    def _load_annotations(
+            self,
+            categories_filename: str,
+            img_info_filename: str,
+            img_filename: str,
+            masks_filename: str,
+            bbox_filename: str,
+    ):
+        import pandas as pd  # For now, blow up on pandas req only when trying to load open images anno
+        _logger.info('Loading categories...')
+        classes_df = pd.read_csv(categories_filename, header=None)
+        self.cat_ids = classes_df[0].tolist()
+        self.cat_names = classes_df[1].tolist()
+        self.cat_id_to_label = {c: i + self.label_offset for i, c in enumerate(self.cat_ids)}
+        def _img_filename(img_id):
+            # build image filenames that are relative to img_dir
+            filename = img_filename % img_id
+            if self.img_prefix_levels:
+                levels = [c for c in img_id[:self.img_prefix_levels]]
+                filename = os.path.join(*levels, filename)
+            return filename
+        def _mask_filename(mask_path):
+            # FIXME finish
+            if self.mask_prefix_levels:
+                levels = [c for c in mask_path[:self.mask_prefix_levels]]
+                mask_path = os.path.join(*levels, mask_path)
+            return mask_path
+        def _load_img_info(csv_file, select_img_ids=None):
+            _logger.info('Read img_info csv...')
+            img_info_df = pd.read_csv(csv_file, index_col='id')
+            _logger.info('Filter images...')
+            if select_img_ids is not None:
+                img_info_df = img_info_df.loc[select_img_ids]
+            img_info_df = img_info_df[
+                (img_info_df['width'] >= self.min_img_size) & (img_info_df['height'] >= self.min_img_size)]
+            _logger.info('Mapping ids...')
+            img_info_df['img_id'] = img_info_df.index
+            img_info_df['file_name'] = img_info_df.index.map(lambda x: _img_filename(x))
+            img_info_df = img_info_df[['img_id', 'file_name', 'width', 'height']]
+            img_sizes = img_info_df[['width', 'height']].values
+            self.img_infos = img_info_df.to_dict('records')
+            self.img_ids = img_info_df.index.values.tolist()
+            img_id_to_idx = {img_id: idx for idx, img_id in enumerate(self.img_ids)}
+            return img_sizes, img_id_to_idx
+        if self.include_masks and self.has_labels:
+            masks_df = pd.read_csv(masks_filename)
+            # NOTE currently using dataset masks anno ImageIDs to form valid img_ids from the dataset
+            anno_img_ids = sorted(masks_df['ImageID'].unique())
+            img_sizes, img_id_to_idx = _load_img_info(img_info_filename, select_img_ids=anno_img_ids)
+            masks_df['ImageIdx'] = masks_df['ImageID'].map(img_id_to_idx)
+            if np.issubdtype(masks_df.ImageIdx.dtype, np.floating):
+                masks_df = masks_df.dropna(axis='rows')
+                masks_df['ImageIdx'] = masks_df.ImageIdx.astype(np.int32)
+            masks_df.sort_values('ImageIdx', inplace=True)
+            ann_img_idx = masks_df['ImageIdx'].values
+            img_sizes = img_sizes[ann_img_idx]
+            masks_df['BoxXMin'] = masks_df['BoxXMin'] * img_sizes[:, 0]
+            masks_df['BoxXMax'] = masks_df['BoxXMax'] * img_sizes[:, 0]
+            masks_df['BoxYMin'] = masks_df['BoxYMin'] * img_sizes[:, 1]
+            masks_df['BoxYMax'] = masks_df['BoxYMax'] * img_sizes[:, 1]
+            masks_df['LabelIdx'] = masks_df['LabelName'].map(self.cat_id_to_label)
+            # FIXME remap mask filename with _mask_filename
+            self._anns = dict(
+                bbox=masks_df[['BoxXMin', 'BoxYMin', 'BoxXMax', 'BoxYMax']].values.astype(np.float32),
+                label=masks_df[['LabelIdx']].values.astype(np.int32),
+                mask_path=masks_df[['MaskPath']].values
+            )
+            _, ri, rc = np.unique(ann_img_idx, return_index=True, return_counts=True)
+            self._img_to_ann = list(zip(ri, rc))  # index, count tuples
+        elif self.has_labels:
+            _logger.info('Loading bbox...')
+            bbox_df = pd.read_csv(bbox_filename)
+            # NOTE currently using dataset box anno ImageIDs to form valid img_ids from the larger dataset.
+            # FIXME use *imagelabels.csv or imagelabels-boxable.csv for negative examples (without box?)
+            anno_img_ids = sorted(bbox_df['ImageID'].unique())
+            img_sizes, img_id_to_idx = _load_img_info(img_info_filename, select_img_ids=anno_img_ids)
+            _logger.info('Process bbox...')
+            bbox_df['ImageIdx'] = bbox_df['ImageID'].map(img_id_to_idx)
+            if np.issubdtype(bbox_df.ImageIdx.dtype, np.floating):
+                bbox_df = bbox_df.dropna(axis='rows')
+                bbox_df['ImageIdx'] = bbox_df.ImageIdx.astype(np.int32)
+            bbox_df.sort_values('ImageIdx', inplace=True)
+            ann_img_idx = bbox_df['ImageIdx'].values
+            img_sizes = img_sizes[ann_img_idx]
+            bbox_df['XMin'] = bbox_df['XMin'] * img_sizes[:, 0]
+            bbox_df['XMax'] = bbox_df['XMax'] * img_sizes[:, 0]
+            bbox_df['YMin'] = bbox_df['YMin'] * img_sizes[:, 1]
+            bbox_df['YMax'] = bbox_df['YMax'] * img_sizes[:, 1]
+            bbox_df['LabelIdx'] = bbox_df['LabelName'].map(self.cat_id_to_label).astype(np.int32)
+            self._anns = dict(
+                bbox=bbox_df[['XMin', 'YMin', 'XMax', 'YMax']].values.astype(np.float32),
+                label=bbox_df[['LabelIdx', 'IsGroupOf']].values.astype(np.int32),
+            )
+            _, ri, rc = np.unique(ann_img_idx, return_index=True, return_counts=True)
+            self._img_to_ann = list(zip(ri, rc))  # index, count tuples
+        else:
+            _load_img_info(img_info_filename)
+        _logger.info('Annotations loaded!')
+    def get_ann_info(self, idx):
+        if not self.has_labels:
+            return dict()
+        start_idx, num_ann = self._img_to_ann[idx]
+        ann_keys = tuple(self._anns.keys())
+        ann_values = tuple(self._anns[k][start_idx:start_idx + num_ann] for k in ann_keys)
+        return self._parse_ann_info(idx, ann_keys, ann_values)
+    def _parse_ann_info(self, img_idx, ann_keys, ann_values):
+        """
+        """
+        gt_bboxes = []
+        gt_labels = []
+        gt_bboxes_ignore = []
+        if self.include_masks:
+            assert 'mask_path' in ann_keys
+            gt_masks = []
+        for ann in zip(*ann_values):
+            ann = dict(zip(ann_keys, ann))
+            x1, y1, x2, y2 = ann['bbox']
+            if x2 - x1 < 1 or y2 - y1 < 1:
+                continue
+            label = ann['label'][0]
+            iscrowd = False
+            if len(ann['label']) > 1:
+                iscrowd = ann['label'][1]
+            if self.yxyx:
+                bbox = np.array([y1, x1, y2, x2], dtype=np.float32)
+            else:
+                bbox = ann['bbox']
+            if iscrowd:
+                gt_bboxes_ignore.append(bbox)
+            else:
+                gt_bboxes.append(bbox)
+                gt_labels.append(label)
+            # if self.include_masks:
+            #     img_info = self.img_infos[img_idx]
+            #     mask_img = SegmentationMask(ann['mask_filename'], img_info['width'], img_info['height'])
+            #     gt_masks.append(mask_img)
+        if gt_bboxes:
+            gt_bboxes = np.array(gt_bboxes, ndmin=2, dtype=np.float32)
+            gt_labels = np.array(gt_labels, dtype=np.int64)
+        else:
+            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
+            gt_labels = np.array([], dtype=np.int64)
+        if self.include_bboxes_ignore:
+            if gt_bboxes_ignore:
+                gt_bboxes_ignore = np.array(gt_bboxes_ignore, ndmin=2, dtype=np.float32)
+            else:
+                gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+        ann = dict(bbox=gt_bboxes, cls=gt_labels)
+        if self.include_bboxes_ignore:
+            ann.update(dict(bbox_ignore=gt_bboxes_ignore, cls_ignore=np.array([], dtype=np.int64)))
+        if self.include_masks:
+            ann['masks'] = gt_masks
+        return ann

efficientdet/effdet/data/parsers/parser_voc.py ADDED Viewed

	@@ -0,0 +1,148 @@

+""" Pascal VOC dataset parser
+Copyright 2020 Ross Wightman
+"""
+import os
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+import numpy as np
+from .parser import Parser
+from .parser_config import VocParserCfg
+class VocParser(Parser):
+    DEFAULT_CLASSES = (
+        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair',
+        'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant',
+        'sheep', 'sofa', 'train', 'tvmonitor')
+    def __init__(self, cfg: VocParserCfg):
+        super().__init__(
+            bbox_yxyx=cfg.bbox_yxyx,
+            has_labels=cfg.has_labels,
+            include_masks=False,  # FIXME to support someday
+            include_bboxes_ignore=False,
+            ignore_empty_gt=cfg.has_labels and cfg.ignore_empty_gt,
+            min_img_size=cfg.min_img_size
+        )
+        self.correct_bbox = 1
+        self.keep_difficult = cfg.keep_difficult
+        self.anns = None
+        self.img_id_to_idx = {}
+        self._load_annotations(
+            split_filename=cfg.split_filename,
+            img_filename=cfg.img_filename,
+            ann_filename=cfg.ann_filename,
+            classes=cfg.classes,
+        )
+    def _load_annotations(
+            self,
+            split_filename: str,
+            img_filename: str,
+            ann_filename: str,
+            classes=None,
+    ):
+        classes = classes or self.DEFAULT_CLASSES
+        self.cat_names = list(classes)
+        self.cat_ids = self.cat_names
+        self.cat_id_to_label = {cat: i + self.label_offset for i, cat in enumerate(self.cat_ids)}
+        self.anns = []
+        with open(split_filename) as f:
+            ids = f.readlines()
+        for img_id in ids:
+            img_id = img_id.strip("\n")
+            filename = img_filename % img_id
+            xml_path = ann_filename % img_id
+            tree = ET.parse(xml_path)
+            root = tree.getroot()
+            size = root.find('size')
+            width = int(size.find('width').text)
+            height = int(size.find('height').text)
+            if min(width, height) < self.min_img_size:
+                continue
+            anns = []
+            for obj_idx, obj in enumerate(root.findall('object')):
+                name = obj.find('name').text
+                label = self.cat_id_to_label[name]
+                difficult = int(obj.find('difficult').text)
+                bnd_box = obj.find('bndbox')
+                bbox = [
+                    int(bnd_box.find('xmin').text),
+                    int(bnd_box.find('ymin').text),
+                    int(bnd_box.find('xmax').text),
+                    int(bnd_box.find('ymax').text)
+                ]
+                anns.append(dict(label=label, bbox=bbox, difficult=difficult))
+            if not self.ignore_empty_gt or len(anns):
+                self.anns.append(anns)
+                self.img_infos.append(dict(id=img_id, file_name=filename, width=width, height=height))
+                self.img_ids.append(img_id)
+            else:
+                self.img_ids_invalid.append(img_id)
+    def merge(self, other):
+        assert len(self.cat_ids) == len(other.cat_ids)
+        self.img_ids.extend(other.img_ids)
+        self.img_infos.extend(other.img_infos)
+        self.anns.extend(other.anns)
+    def get_ann_info(self, idx):
+        return self._parse_ann_info(self.anns[idx])
+    def _parse_ann_info(self, ann_info):
+        bboxes = []
+        labels = []
+        bboxes_ignore = []
+        labels_ignore = []
+        for ann in ann_info:
+            ignore = False
+            x1, y1, x2, y2 = ann['bbox']
+            label = ann['label']
+            w = x2 - x1
+            h = y2 - y1
+            if w < 1 or h < 1:
+                ignore = True
+            if self.yxyx:
+                bbox = [y1, x1, y2, x2]
+            else:
+                bbox = ann['bbox']
+            if ignore or (ann['difficult'] and not self.keep_difficult):
+                bboxes_ignore.append(bbox)
+                labels_ignore.append(label)
+            else:
+                bboxes.append(bbox)
+                labels.append(label)
+        if not bboxes:
+            bboxes = np.zeros((0, 4), dtype=np.float32)
+            labels = np.zeros((0, ), dtype=np.float32)
+        else:
+            bboxes = np.array(bboxes, ndmin=2, dtype=np.float32) - self.correct_bbox
+            labels = np.array(labels, dtype=np.float32)
+        if self.include_bboxes_ignore:
+            if not bboxes_ignore:
+                bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+                labels_ignore = np.zeros((0, ), dtype=np.float32)
+            else:
+                bboxes_ignore = np.array(bboxes_ignore, ndmin=2, dtype=np.float32) - self.correct_bbox
+                labels_ignore = np.array(labels_ignore, dtype=np.float32)
+        ann = dict(
+            bbox=bboxes.astype(np.float32),
+            cls=labels.astype(np.int64))
+        if self.include_bboxes_ignore:
+            ann.update(dict(
+                bbox_ignore=bboxes_ignore.astype(np.float32),
+                cls_ignore=labels_ignore.astype(np.int64)))
+        return ann

efficientdet/effdet/data/random_erasing.py ADDED Viewed

	@@ -0,0 +1,94 @@

+""" Multi-Scale RandomErasing
+Copyright 2020 Ross Wightman
+"""
+import random
+import math
+import torch
+def _get_pixels(per_pixel, rand_color, patch_size, dtype=torch.float32, device='cuda'):
+    # NOTE I've seen CUDA illegal memory access errors being caused by the normal_()
+    # paths, flip the order so normal is run on CPU if this becomes a problem
+    # Issue has been fixed in master https://github.com/pytorch/pytorch/issues/19508
+    if per_pixel:
+        return torch.empty(patch_size, dtype=dtype, device=device).normal_()
+    elif rand_color:
+        return torch.empty((patch_size[0], 1, 1), dtype=dtype, device=device).normal_()
+    else:
+        return torch.zeros((patch_size[0], 1, 1), dtype=dtype, device=device)
+class RandomErasing:
+    """ Randomly selects a rectangle region in an image and erases its pixels.
+        'Random Erasing Data Augmentation' by Zhong et al.
+        See https://arxiv.org/pdf/1708.04896.pdf
+        This variant of RandomErasing is tweaked for multi-scale obj detection training.
+    Args:
+         probability: Probability that the Random Erasing operation will be performed.
+         min_area: Minimum percentage of erased area wrt input image area.
+         max_area: Maximum percentage of erased area wrt input image area.
+         min_aspect: Minimum aspect ratio of erased area.
+         mode: pixel color mode, one of 'const', 'rand', or 'pixel'
+            'const' - erase block is constant color of 0 for all channels
+            'rand'  - erase block is same per-channel random (normal) color
+            'pixel' - erase block is per-pixel random (normal) color
+        max_count: maximum number of erasing blocks per image, area per box is scaled by count.
+            per-image count is randomly chosen between 1 and this value.
+    """
+    def __init__(
+            self,
+            probability=0.5, min_area=0.02, max_area=1/4, min_aspect=0.3, max_aspect=None,
+            mode='const', min_count=1, max_count=None, num_splits=0, device='cuda'):
+        self.probability = probability
+        self.min_area = min_area
+        self.max_area = max_area
+        max_aspect = max_aspect or 1 / min_aspect
+        self.log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect))
+        self.min_count = min_count
+        self.max_count = max_count or min_count
+        self.num_splits = num_splits
+        mode = mode.lower()
+        self.rand_color = False
+        self.per_pixel = False
+        if mode == 'rand':
+            self.rand_color = True  # per block random normal
+        elif mode == 'pixel':
+            self.per_pixel = True  # per pixel random normal
+        else:
+            assert not mode or mode == 'const'
+        self.device = device
+    def _erase(self, img, chan, img_h, img_w, dtype):
+        if random.random() > self.probability:
+            return
+        area = img_h * img_w
+        count = self.min_count if self.min_count == self.max_count else \
+            random.randint(self.min_count, self.max_count)
+        for _ in range(count):
+            for attempt in range(10):
+                target_area = random.uniform(self.min_area, self.max_area) * area / count
+                aspect_ratio = math.exp(random.uniform(*self.log_aspect_ratio))
+                h = int(round(math.sqrt(target_area * aspect_ratio)))
+                w = int(round(math.sqrt(target_area / aspect_ratio)))
+                if w < img_w and h < img_h:
+                    top = random.randint(0, img_h - h)
+                    left = random.randint(0, img_w - w)
+                    img[:, top:top + h, left:left + w] = _get_pixels(
+                        self.per_pixel, self.rand_color, (chan, h, w),
+                        dtype=dtype, device=self.device)
+                    break
+    def __call__(self, input, target):
+        batch_size, chan, input_h, input_w = input.shape
+        img_scales = target['img_scale']
+        img_size = (target['img_size'] / img_scales.unsqueeze(1)).int()
+        img_size[:, 0] = img_size[:, 0].clamp(max=input_w)
+        img_size[:, 1] = img_size[:, 1].clamp(max=input_h)
+        # skip first slice of batch if num_splits is set (for clean portion of samples)
+        batch_start = batch_size // self.num_splits if self.num_splits > 1 else 0
+        for i in range(batch_start, batch_size):
+            self._erase(input[i], chan, img_size[i, 1], img_size[i, 0], input.dtype)
+        return input

efficientdet/effdet/data/transforms.py ADDED Viewed

	@@ -0,0 +1,275 @@

+""" COCO transforms (quick and dirty)
+Hacked together by Ross Wightman
+"""
+import torch
+from PIL import Image
+import numpy as np
+import random
+import math
+IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
+IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5)
+IMAGENET_INCEPTION_STD = (0.5, 0.5, 0.5)
+class ImageToNumpy:
+    def __call__(self, pil_img, annotations: dict):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.moveaxis(np_img, 2, 0)  # HWC to CHW
+        return np_img, annotations
+class ImageToTensor:
+    def __init__(self, dtype=torch.float32):
+        self.dtype = dtype
+    def __call__(self, pil_img, annotations: dict):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.moveaxis(np_img, 2, 0)  # HWC to CHW
+        return torch.from_numpy(np_img).to(dtype=self.dtype), annotations
+def _pil_interp(method):
+    if method == 'bicubic':
+        return Image.BICUBIC
+    elif method == 'lanczos':
+        return Image.LANCZOS
+    elif method == 'hamming':
+        return Image.HAMMING
+    else:
+        # default bilinear, do we want to allow nearest?
+        return Image.BILINEAR
+def clip_boxes_(boxes, img_size):
+    height, width = img_size
+    clip_upper = np.array([height, width] * 2, dtype=boxes.dtype)
+    np.clip(boxes, 0, clip_upper, out=boxes)
+def clip_boxes(boxes, img_size):
+    clipped_boxes = boxes.copy()
+    clip_boxes_(clipped_boxes, img_size)
+    return clipped_boxes
+def _size_tuple(size):
+    if isinstance(size, int):
+        return size, size
+    else:
+        assert len(size) == 2
+        return size
+class ResizePad:
+    def __init__(self, target_size: int, interpolation: str = 'bilinear', fill_color: tuple = (0, 0, 0)):
+        self.target_size = _size_tuple(target_size)
+        self.interpolation = interpolation
+        self.fill_color = fill_color
+    def __call__(self, img, anno: dict):
+        width, height = img.size
+        img_scale_y = self.target_size[0] / height
+        img_scale_x = self.target_size[1] / width
+        img_scale = min(img_scale_y, img_scale_x)
+        scaled_h = int(height * img_scale)
+        scaled_w = int(width * img_scale)
+        new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
+        interp_method = _pil_interp(self.interpolation)
+        img = img.resize((scaled_w, scaled_h), interp_method)
+        new_img.paste(img)
+        if 'bbox' in anno:
+            # FIXME haven't tested this path since not currently using dataset annotations for train/eval
+            bbox = anno['bbox']
+            bbox[:, :4] *= img_scale
+            clip_boxes_(bbox, (scaled_h, scaled_w))
+            valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
+            anno['bbox'] = bbox[valid_indices, :]
+            anno['cls'] = anno['cls'][valid_indices]
+        anno['img_scale'] = 1. / img_scale  # back to original
+        return new_img, anno
+class RandomResizePad:
+    def __init__(self, target_size: int, scale: tuple = (0.1, 2.0), interpolation: str = 'bilinear',
+                 fill_color: tuple = (0, 0, 0)):
+        self.target_size = _size_tuple(target_size)
+        self.scale = scale
+        self.interpolation = interpolation
+        self.fill_color = fill_color
+    def _get_params(self, img):
+        # Select a random scale factor.
+        scale_factor = random.uniform(*self.scale)
+        scaled_target_height = scale_factor * self.target_size[0]
+        scaled_target_width = scale_factor * self.target_size[1]
+        # Recompute the accurate scale_factor using rounded scaled image size.
+        width, height = img.size
+        img_scale_y = scaled_target_height / height
+        img_scale_x = scaled_target_width / width
+        img_scale = min(img_scale_y, img_scale_x)
+        # Select non-zero random offset (x, y) if scaled image is larger than target size
+        scaled_h = int(height * img_scale)
+        scaled_w = int(width * img_scale)
+        offset_y = scaled_h - self.target_size[0]
+        offset_x = scaled_w - self.target_size[1]
+        offset_y = int(max(0.0, float(offset_y)) * random.uniform(0, 1))
+        offset_x = int(max(0.0, float(offset_x)) * random.uniform(0, 1))
+        return scaled_h, scaled_w, offset_y, offset_x, img_scale
+    def __call__(self, img, anno: dict):
+        scaled_h, scaled_w, offset_y, offset_x, img_scale = self._get_params(img)
+        interp_method = _pil_interp(self.interpolation)
+        img = img.resize((scaled_w, scaled_h), interp_method)
+        right, lower = min(scaled_w, offset_x + self.target_size[1]), min(scaled_h, offset_y + self.target_size[0])
+        img = img.crop((offset_x, offset_y, right, lower))
+        new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
+        new_img.paste(img)
+        if 'bbox' in anno:
+            # FIXME not fully tested
+            bbox = anno['bbox'].copy()  # FIXME copy for debugger inspection, back to inplace
+            bbox[:, :4] *= img_scale
+            box_offset = np.stack([offset_y, offset_x] * 2)
+            bbox -= box_offset
+            clip_boxes_(bbox, (scaled_h, scaled_w))
+            valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
+            anno['bbox'] = bbox[valid_indices, :]
+            anno['cls'] = anno['cls'][valid_indices]
+        anno['img_scale'] = 1. / img_scale  # back to original
+        return new_img, anno
+class RandomFlip:
+    def __init__(self, horizontal=True, vertical=False, prob=0.5):
+        self.horizontal = horizontal
+        self.vertical = vertical
+        self.prob = prob
+    def _get_params(self):
+        do_horizontal = random.random() < self.prob if self.horizontal else False
+        do_vertical = random.random() < self.prob if self.vertical else False
+        return do_horizontal, do_vertical
+    def __call__(self, img, annotations: dict):
+        do_horizontal, do_vertical = self._get_params()
+        width, height = img.size
+        def _fliph(bbox):
+            x_max = width - bbox[:, 1]
+            x_min = width - bbox[:, 3]
+            bbox[:, 1] = x_min
+            bbox[:, 3] = x_max
+        def _flipv(bbox):
+            y_max = height - bbox[:, 0]
+            y_min = height - bbox[:, 2]
+            bbox[:, 0] = y_min
+            bbox[:, 2] = y_max
+        if do_horizontal and do_vertical:
+            img = img.transpose(Image.ROTATE_180)
+            if 'bbox' in annotations:
+                _fliph(annotations['bbox'])
+                _flipv(annotations['bbox'])
+        elif do_horizontal:
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+            if 'bbox' in annotations:
+                _fliph(annotations['bbox'])
+        elif do_vertical:
+            img = img.transpose(Image.FLIP_TOP_BOTTOM)
+            if 'bbox' in annotations:
+                _flipv(annotations['bbox'])
+        return img, annotations
+def resolve_fill_color(fill_color, img_mean=IMAGENET_DEFAULT_MEAN):
+    if isinstance(fill_color, tuple):
+        assert len(fill_color) == 3
+        fill_color = fill_color
+    else:
+        try:
+            int_color = int(fill_color)
+            fill_color = (int_color,) * 3
+        except ValueError:
+            assert fill_color == 'mean'
+            fill_color = tuple([int(round(255 * x)) for x in img_mean])
+    return fill_color
+class Compose:
+    def __init__(self, transforms: list):
+        self.transforms = transforms
+    def __call__(self, img, annotations: dict):
+        for t in self.transforms:
+            img, annotations = t(img, annotations)
+        return img, annotations
+def transforms_coco_eval(
+        img_size=224,
+        interpolation='bilinear',
+        use_prefetcher=False,
+        fill_color='mean',
+        mean=IMAGENET_DEFAULT_MEAN,
+        std=IMAGENET_DEFAULT_STD):
+    fill_color = resolve_fill_color(fill_color, mean)
+    image_tfl = [
+        ResizePad(
+            target_size=img_size, interpolation=interpolation, fill_color=fill_color),
+        ImageToNumpy(),
+    ]
+    assert use_prefetcher, "Only supporting prefetcher usage right now"
+    image_tf = Compose(image_tfl)
+    return image_tf
+def transforms_coco_train(
+        img_size=224,
+        interpolation='random',
+        use_prefetcher=False,
+        fill_color='mean',
+        mean=IMAGENET_DEFAULT_MEAN,
+        std=IMAGENET_DEFAULT_STD):
+    fill_color = resolve_fill_color(fill_color, mean)
+    image_tfl = [
+        RandomFlip(horizontal=True, prob=0.5),
+        RandomResizePad(
+            target_size=img_size, interpolation=interpolation, fill_color=fill_color),
+        ImageToNumpy(),
+    ]
+    assert use_prefetcher, "Only supporting prefetcher usage right now"
+    image_tf = Compose(image_tfl)
+    return image_tf

efficientdet/effdet/data/transforms_albumentation.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import albumentations as A
+from albumentations.augmentations.transforms import (
+    RandomBrightness, Downscale, RandomFog, RandomRain, RandomSnow)
+from albumentations.augmentations.blur.transforms import Blur
+def get_transform():
+    transforms = A.Compose([
+        #HorizontalFlip(p=0.5),
+        #VerticalFlip(p=0.5),
+        #RandomSizedBBoxSafeCrop(700, 700, erosion_rate=0.0, interpolation=1, always_apply=False, p=0.5),
+        Blur(blur_limit=7, always_apply=False, p=0.5),
+        RandomBrightness(limit=0.2, always_apply=False, p=0.5),
+        #Downscale(scale_min=0.5, scale_max=0.9, interpolation=0, always_apply=False, p=0.5),
+        #PadIfNeeded(min_height=1024, min_width=1024, pad_height_divisor=None, pad_width_divisor=None, border_mode=4, value=None, mask_value=None, always_apply=False, p=1.0),
+        #RandomFog(fog_coef_lower=0.3, fog_coef_upper=1, alpha_coef=0.08, always_apply=False, p=0.2),
+        #RandomRain(slant_lower=-10, slant_upper=10, drop_length=20, drop_width=1, drop_color=(200, 200, 200), p=0.2),
+        #RandomSnow(snow_point_lower=0.1, snow_point_upper=0.3, brightness_coeff=2.5, always_apply=False, p=0.2)
+        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_classes'])
+    )
+    return transforms

efficientdet/effdet/distributed.py ADDED Viewed

	@@ -0,0 +1,308 @@

+""" PyTorch distributed helpers
+Some of this lifted from Detectron2 with other fns added by myself. Some of the Detectron2 fns
+were intended for use with GLOO PG. I am using NCCL here with default PG so not everything will work
+as is -RW
+"""
+import functools
+import logging
+import numpy as np
+import pickle
+import torch
+import torch.distributed as dist
+_LOCAL_PROCESS_GROUP = None
+"""
+A torch process group which only includes processes that on the same machine as the current process.
+This variable is set when processes are spawned by `launch()` in "engine/launch.py".
+"""
+def get_world_size() -> int:
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+def get_rank() -> int:
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+def get_local_rank() -> int:
+    """
+    Returns:
+        The rank of the current process within the local (per-machine) process group.
+    """
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    assert _LOCAL_PROCESS_GROUP is not None
+    return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
+def get_local_size() -> int:
+    """
+    Returns:
+        The size of the per-machine process group,
+        i.e. the number of processes per machine.
+    """
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
+def is_main_process() -> bool:
+    return get_rank() == 0
+def synchronize():
+    """
+    Helper function to synchronize (barrier) among all processes when
+    using distributed training
+    """
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+    if dist.get_backend() == "nccl":
+        return dist.new_group(backend="gloo")
+    else:
+        return dist.group.WORLD
+def _serialize_to_tensor(data, group):
+    backend = dist.get_backend(group)
+    assert backend in ["gloo", "nccl"]
+    device = torch.device("cpu" if backend == "gloo" else "cuda")
+    buffer = pickle.dumps(data)
+    if len(buffer) > 1024 ** 3:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
+                get_rank(), len(buffer) / (1024 ** 3), device
+            )
+        )
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to(device=device)
+    return tensor
+def _pad_to_largest_tensor(tensor, group):
+    """
+    Returns:
+        list[int]: size of the tensor, on each rank
+        Tensor: padded tensor that has the max size
+    """
+    world_size = dist.get_world_size(group=group)
+    assert (
+        world_size >= 1
+    ), "comm.gather/all_gather must be called from ranks within the given group!"
+    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
+    size_list = [
+        torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
+    ]
+    dist.all_gather(size_list, local_size, group=group)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    if local_size != max_size:
+        padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
+        tensor = torch.cat((tensor, padding), dim=0)
+    return size_list, tensor
+def all_gather(data, group=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return [data]
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    max_size = max(size_list)
+    # receiving Tensor from all ranks
+    tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list]
+    dist.all_gather(tensor_list, tensor, group=group)
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+    return data_list
+def gather(data, dst=0, group=None):
+    """
+    Run gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        dst (int): destination rank
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: on dst, a list of data gathered from each rank. Otherwise,
+            an empty list.
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group=group) == 1:
+        return [data]
+    rank = dist.get_rank(group=group)
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    # receiving Tensor from all ranks
+    if rank == dst:
+        max_size = max(size_list)
+        tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list]
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+        data_list = []
+        for size, tensor in zip(size_list, tensor_list):
+            buffer = tensor.cpu().numpy().tobytes()[:size]
+            data_list.append(pickle.loads(buffer))
+        return data_list
+    else:
+        dist.gather(tensor, [], dst=dst, group=group)
+        return []
+def shared_random_seed():
+    """
+    Returns:
+        int: a random number that is the same across all workers.
+            If workers need a shared RNG, they can use this shared seed to
+            create one.
+    All workers must call this function, otherwise it will deadlock.
+    """
+    ints = np.random.randint(2 ** 31)
+    all_ints = all_gather(ints)
+    return all_ints[0]
+def reduce_dict(input_dict, average=True):
+    """
+    Reduce the values in the dictionary from all processes so that process with rank
+    0 has the reduced results.
+    Args:
+        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
+        average (bool): whether to do average or sum
+    Returns:
+        a dict with the same keys as input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.reduce(values, dst=0)
+        if dist.get_rank() == 0 and average:
+            # only main process gets accumulated, so only divide by
+            # world_size in this case
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+def all_gather_container(container, group=None, cat_dim=0):
+    group = group or dist.group.WORLD
+    world_size = dist.get_world_size(group)
+    def _do_gather(tensor):
+        tensor_list = [torch.empty_like(tensor) for _ in range(world_size)]
+        dist.all_gather(tensor_list, tensor, group=group)
+        return torch.cat(tensor_list, dim=cat_dim)
+    if isinstance(container, dict):
+        gathered = dict()
+        for k, v in container.items():
+            v = _do_gather(v)
+            gathered[k] = v
+        return gathered
+    elif isinstance(container, (list, tuple)):
+        gathered = [_do_gather(v) for v in container]
+        if isinstance(container, tuple):
+            gathered = tuple(gathered)
+        return gathered
+    else:
+        # if not a dict, list, tuple, expect a singular tensor
+        assert isinstance(container, torch.Tensor)
+        return _do_gather(container)
+def gather_container(container, dst, group=None, cat_dim=0):
+    group = group or dist.group.WORLD
+    world_size = dist.get_world_size(group)
+    this_rank = dist.get_rank(group)
+    def _do_gather(tensor):
+        if this_rank == dst:
+            tensor_list = [torch.empty_like(tensor) for _ in range(world_size)]
+        else:
+            tensor_list = None
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+        return torch.cat(tensor_list, dim=cat_dim)
+    if isinstance(container, dict):
+        gathered = dict()
+        for k, v in container.items():
+            v = _do_gather(v)
+            gathered[k] = v
+        return gathered
+    elif isinstance(container, (list, tuple)):
+        gathered = [_do_gather(v) for v in container]
+        if isinstance(container, tuple):
+            gathered = tuple(gathered)
+        return gathered
+    else:
+        # if not a dict, list, tuple, expect a singular tensor
+        assert isinstance(container, torch.Tensor)
+        return _do_gather(container)

efficientdet/effdet/efficientdet.py ADDED Viewed

	@@ -0,0 +1,557 @@

+""" PyTorch EfficientDet model
+Based on official Tensorflow version at: https://github.com/google/automl/tree/master/efficientdet
+Paper: https://arxiv.org/abs/1911.09070
+Hacked together by Ross Wightman
+"""
+import torch
+import torch.nn as nn
+import logging
+import math
+from collections import OrderedDict
+from typing import List, Callable
+from functools import partial
+from timm import create_model
+from timm.models.layers import create_conv2d, drop_path, create_pool2d, Swish, get_act_layer
+from .config import get_fpn_config, set_config_writeable, set_config_readonly
+_DEBUG = False
+_ACT_LAYER = Swish
+class SequentialList(nn.Sequential):
+    """ This module exists to work around torchscript typing issues list -> list"""
+    def __init__(self, *args):
+        super(SequentialList, self).__init__(*args)
+    def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
+        for module in self:
+            x = module(x)
+        return x
+class ConvBnAct2d(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, padding='', bias=False,
+                 norm_layer=nn.BatchNorm2d, act_layer=_ACT_LAYER):
+        super(ConvBnAct2d, self).__init__()
+        self.conv = create_conv2d(
+            in_channels, out_channels, kernel_size, stride=stride, dilation=dilation, padding=padding, bias=bias)
+        self.bn = None if norm_layer is None else norm_layer(out_channels)
+        self.act = None if act_layer is None else act_layer(inplace=True)
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.act is not None:
+            x = self.act(x)
+        return x
+class SeparableConv2d(nn.Module):
+    """ Separable Conv
+    """
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False,
+                 channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, act_layer=_ACT_LAYER):
+        super(SeparableConv2d, self).__init__()
+        self.conv_dw = create_conv2d(
+            in_channels, int(in_channels * channel_multiplier), kernel_size,
+            stride=stride, dilation=dilation, padding=padding, depthwise=True)
+        self.conv_pw = create_conv2d(
+            int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias)
+        self.bn = None if norm_layer is None else norm_layer(out_channels)
+        self.act = None if act_layer is None else act_layer(inplace=True)
+    def forward(self, x):
+        x = self.conv_dw(x)
+        x = self.conv_pw(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.act is not None:
+            x = self.act(x)
+        return x
+class ResampleFeatureMap(nn.Sequential):
+    def __init__(self, in_channels, out_channels, reduction_ratio=1., pad_type='', pooling_type='max',
+                 norm_layer=nn.BatchNorm2d, apply_bn=False, conv_after_downsample=False, redundant_bias=False):
+        super(ResampleFeatureMap, self).__init__()
+        pooling_type = pooling_type or 'max'
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.reduction_ratio = reduction_ratio
+        self.conv_after_downsample = conv_after_downsample
+        conv = None
+        if in_channels != out_channels:
+            conv = ConvBnAct2d(
+                in_channels, out_channels, kernel_size=1, padding=pad_type,
+                norm_layer=norm_layer if apply_bn else None,
+                bias=not apply_bn or redundant_bias, act_layer=None)
+        if reduction_ratio > 1:
+            stride_size = int(reduction_ratio)
+            if conv is not None and not self.conv_after_downsample:
+                self.add_module('conv', conv)
+            self.add_module(
+                'downsample',
+                create_pool2d(
+                    pooling_type, kernel_size=stride_size + 1, stride=stride_size, padding=pad_type))
+            if conv is not None and self.conv_after_downsample:
+                self.add_module('conv', conv)
+        else:
+            if conv is not None:
+                self.add_module('conv', conv)
+            if reduction_ratio < 1:
+                scale = int(1 // reduction_ratio)
+                self.add_module('upsample', nn.UpsamplingNearest2d(scale_factor=scale))
+    # def forward(self, x):
+    #     #  here for debugging only
+    #     assert x.shape[1] == self.in_channels
+    #     if self.reduction_ratio > 1:
+    #         if hasattr(self, 'conv') and not self.conv_after_downsample:
+    #             x = self.conv(x)
+    #         x = self.downsample(x)
+    #         if hasattr(self, 'conv') and self.conv_after_downsample:
+    #             x = self.conv(x)
+    #     else:
+    #         if hasattr(self, 'conv'):
+    #             x = self.conv(x)
+    #         if self.reduction_ratio < 1:
+    #             x = self.upsample(x)
+    #     return x
+class FpnCombine(nn.Module):
+    def __init__(self, feature_info, fpn_config, fpn_channels, inputs_offsets, target_reduction, pad_type='',
+                 pooling_type='max', norm_layer=nn.BatchNorm2d, apply_bn_for_resampling=False,
+                 conv_after_downsample=False, redundant_bias=False, weight_method='attn'):
+        super(FpnCombine, self).__init__()
+        self.inputs_offsets = inputs_offsets
+        self.weight_method = weight_method
+        self.resample = nn.ModuleDict()
+        for idx, offset in enumerate(inputs_offsets):
+            in_channels = fpn_channels
+            if offset < len(feature_info):
+                in_channels = feature_info[offset]['num_chs']
+                input_reduction = feature_info[offset]['reduction']
+            else:
+                node_idx = offset - len(feature_info)
+                input_reduction = fpn_config.nodes[node_idx]['reduction']
+            reduction_ratio = target_reduction / input_reduction
+            self.resample[str(offset)] = ResampleFeatureMap(
+                in_channels, fpn_channels, reduction_ratio=reduction_ratio, pad_type=pad_type,
+                pooling_type=pooling_type, norm_layer=norm_layer, apply_bn=apply_bn_for_resampling,
+                conv_after_downsample=conv_after_downsample, redundant_bias=redundant_bias)
+        if weight_method == 'attn' or weight_method == 'fastattn':
+            self.edge_weights = nn.Parameter(torch.ones(len(inputs_offsets)), requires_grad=True)  # WSM
+        else:
+            self.edge_weights = None
+    def forward(self, x: List[torch.Tensor]):
+        dtype = x[0].dtype
+        nodes = []
+        for offset, resample in zip(self.inputs_offsets, self.resample.values()):
+            input_node = x[offset]
+            input_node = resample(input_node)
+            nodes.append(input_node)
+        if self.weight_method == 'attn':
+            normalized_weights = torch.softmax(self.edge_weights.to(dtype=dtype), dim=0)
+            out = torch.stack(nodes, dim=-1) * normalized_weights
+        elif self.weight_method == 'fastattn':
+            edge_weights = nn.functional.relu(self.edge_weights.to(dtype=dtype))
+            weights_sum = torch.sum(edge_weights)
+            out = torch.stack(
+                [(nodes[i] * edge_weights[i]) / (weights_sum + 0.0001) for i in range(len(nodes))], dim=-1)
+        elif self.weight_method == 'sum':
+            out = torch.stack(nodes, dim=-1)
+        else:
+            raise ValueError('unknown weight_method {}'.format(self.weight_method))
+        out = torch.sum(out, dim=-1)
+        return out
+class Fnode(nn.Module):
+    """ A simple wrapper used in place of nn.Sequential for torchscript typing
+    Handles input type List[Tensor] -> output type Tensor
+    """
+    def __init__(self, combine: nn.Module, after_combine: nn.Module):
+        super(Fnode, self).__init__()
+        self.combine = combine
+        self.after_combine = after_combine
+    def forward(self, x: List[torch.Tensor]) -> torch.Tensor:
+        return self.after_combine(self.combine(x))
+class BiFpnLayer(nn.Module):
+    def __init__(self, feature_info, fpn_config, fpn_channels, num_levels=5, pad_type='',
+                 pooling_type='max', norm_layer=nn.BatchNorm2d, act_layer=_ACT_LAYER,
+                 apply_bn_for_resampling=False, conv_after_downsample=True, conv_bn_relu_pattern=False,
+                 separable_conv=True, redundant_bias=False):
+        super(BiFpnLayer, self).__init__()
+        self.num_levels = num_levels
+        self.conv_bn_relu_pattern = False
+        self.feature_info = []
+        self.fnode = nn.ModuleList()
+        for i, fnode_cfg in enumerate(fpn_config.nodes):
+            logging.debug('fnode {} : {}'.format(i, fnode_cfg))
+            reduction = fnode_cfg['reduction']
+            combine = FpnCombine(
+                feature_info, fpn_config, fpn_channels, tuple(fnode_cfg['inputs_offsets']),
+                target_reduction=reduction, pad_type=pad_type, pooling_type=pooling_type, norm_layer=norm_layer,
+                apply_bn_for_resampling=apply_bn_for_resampling, conv_after_downsample=conv_after_downsample,
+                redundant_bias=redundant_bias, weight_method=fnode_cfg['weight_method'])
+            after_combine = nn.Sequential()
+            conv_kwargs = dict(
+                in_channels=fpn_channels, out_channels=fpn_channels, kernel_size=3, padding=pad_type,
+                bias=False, norm_layer=norm_layer, act_layer=act_layer)
+            if not conv_bn_relu_pattern:
+                conv_kwargs['bias'] = redundant_bias
+                conv_kwargs['act_layer'] = None
+                after_combine.add_module('act', act_layer(inplace=True))
+            after_combine.add_module(
+                'conv', SeparableConv2d(**conv_kwargs) if separable_conv else ConvBnAct2d(**conv_kwargs))
+            self.fnode.append(Fnode(combine=combine, after_combine=after_combine))
+            self.feature_info.append(dict(num_chs=fpn_channels, reduction=reduction))
+        self.feature_info = self.feature_info[-num_levels::]
+    def forward(self, x: List[torch.Tensor]):
+        for fn in self.fnode:
+            x.append(fn(x))
+        return x[-self.num_levels::]
+class BiFpn(nn.Module):
+    def __init__(self, config, feature_info):
+        super(BiFpn, self).__init__()
+        self.num_levels = config.num_levels
+        norm_layer = config.norm_layer or nn.BatchNorm2d
+        if config.norm_kwargs:
+            norm_layer = partial(norm_layer, **config.norm_kwargs)
+        act_layer = get_act_layer(config.act_type) or _ACT_LAYER
+        fpn_config = config.fpn_config or get_fpn_config(
+            config.fpn_name, min_level=config.min_level, max_level=config.max_level)
+        self.resample = nn.ModuleDict()
+        for level in range(config.num_levels):
+            if level < len(feature_info):
+                in_chs = feature_info[level]['num_chs']
+                reduction = feature_info[level]['reduction']
+            else:
+                # Adds a coarser level by downsampling the last feature map
+                reduction_ratio = 2
+                self.resample[str(level)] = ResampleFeatureMap(
+                    in_channels=in_chs,
+                    out_channels=config.fpn_channels,
+                    pad_type=config.pad_type,
+                    pooling_type=config.pooling_type,
+                    norm_layer=norm_layer,
+                    reduction_ratio=reduction_ratio,
+                    apply_bn=config.apply_bn_for_resampling,
+                    conv_after_downsample=config.conv_after_downsample,
+                    redundant_bias=config.redundant_bias,
+                )
+                in_chs = config.fpn_channels
+                reduction = int(reduction * reduction_ratio)
+                feature_info.append(dict(num_chs=in_chs, reduction=reduction))
+        self.cell = SequentialList()
+        for rep in range(config.fpn_cell_repeats):
+            logging.debug('building cell {}'.format(rep))
+            fpn_layer = BiFpnLayer(
+                feature_info=feature_info,
+                fpn_config=fpn_config,
+                fpn_channels=config.fpn_channels,
+                num_levels=config.num_levels,
+                pad_type=config.pad_type,
+                pooling_type=config.pooling_type,
+                norm_layer=norm_layer,
+                act_layer=act_layer,
+                separable_conv=config.separable_conv,
+                apply_bn_for_resampling=config.apply_bn_for_resampling,
+                conv_after_downsample=config.conv_after_downsample,
+                conv_bn_relu_pattern=config.conv_bn_relu_pattern,
+                redundant_bias=config.redundant_bias,
+            )
+            self.cell.add_module(str(rep), fpn_layer)
+            feature_info = fpn_layer.feature_info
+    def forward(self, x: List[torch.Tensor]):
+        for resample in self.resample.values():
+            x.append(resample(x[-1]))
+        x = self.cell(x)
+        return x
+class HeadNet(nn.Module):
+    def __init__(self, config, num_outputs):
+        super(HeadNet, self).__init__()
+        self.num_levels = config.num_levels
+        self.bn_level_first = getattr(config, 'head_bn_level_first', False)
+        norm_layer = config.norm_layer or nn.BatchNorm2d
+        if config.norm_kwargs:
+            norm_layer = partial(norm_layer, **config.norm_kwargs)
+        act_layer = get_act_layer(config.act_type) or _ACT_LAYER
+        # Build convolution repeats
+        conv_fn = SeparableConv2d if config.separable_conv else ConvBnAct2d
+        conv_kwargs = dict(
+            in_channels=config.fpn_channels, out_channels=config.fpn_channels, kernel_size=3,
+            padding=config.pad_type, bias=config.redundant_bias, act_layer=None, norm_layer=None)
+        self.conv_rep = nn.ModuleList([conv_fn(**conv_kwargs) for _ in range(config.box_class_repeats)])
+        # Build batchnorm repeats. There is a unique batchnorm per feature level for each repeat.
+        # This can be organized with repeats first or feature levels first in module lists, the original models
+        # and weights were setup with repeats first, levels first is required for efficient torchscript usage.
+        self.bn_rep = nn.ModuleList()
+        if self.bn_level_first:
+            for _ in range(self.num_levels):
+                self.bn_rep.append(nn.ModuleList([
+                    norm_layer(config.fpn_channels) for _ in range(config.box_class_repeats)]))
+        else:
+            for _ in range(config.box_class_repeats):
+                self.bn_rep.append(nn.ModuleList([
+                    nn.Sequential(OrderedDict([('bn', norm_layer(config.fpn_channels))]))
+                    for _ in range(self.num_levels)]))
+        self.act = act_layer(inplace=True)
+        # Prediction (output) layer. Has bias with special init reqs, see init fn.
+        num_anchors = len(config.aspect_ratios) * config.num_scales
+        predict_kwargs = dict(
+            in_channels=config.fpn_channels, out_channels=num_outputs * num_anchors, kernel_size=3,
+            padding=config.pad_type, bias=True, norm_layer=None, act_layer=None)
+        self.predict = conv_fn(**predict_kwargs)
+    @torch.jit.ignore()
+    def toggle_bn_level_first(self):
+        """ Toggle the batchnorm layers between feature level first vs repeat first access pattern
+        Limitations in torchscript require feature levels to be iterated over first.
+        This function can be used to allow loading weights in the original order, and then toggle before
+        jit scripting the model.
+        """
+        with torch.no_grad():
+            new_bn_rep = nn.ModuleList()
+            for i in range(len(self.bn_rep[0])):
+                bn_first = nn.ModuleList()
+                for r in self.bn_rep.children():
+                    m = r[i]
+                    # NOTE original rep first model def has extra Sequential container with 'bn', this was
+                    # flattened in the level first definition.
+                    bn_first.append(m[0] if isinstance(m, nn.Sequential) else nn.Sequential(OrderedDict([('bn', m)])))
+                new_bn_rep.append(bn_first)
+            self.bn_level_first = not self.bn_level_first
+            self.bn_rep = new_bn_rep
+    @torch.jit.ignore()
+    def _forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
+        outputs = []
+        for level in range(self.num_levels):
+            x_level = x[level]
+            for conv, bn in zip(self.conv_rep, self.bn_rep):
+                x_level = conv(x_level)
+                x_level = bn[level](x_level)  # this is not allowed in torchscript
+                x_level = self.act(x_level)
+            outputs.append(self.predict(x_level))
+        return outputs
+    def _forward_level_first(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
+        outputs = []
+        for level, bn_rep in enumerate(self.bn_rep):  # iterating over first bn dim first makes TS happy
+            x_level = x[level]
+            for conv, bn in zip(self.conv_rep, bn_rep):
+                x_level = conv(x_level)
+                x_level = bn(x_level)
+                x_level = self.act(x_level)
+            outputs.append(self.predict(x_level))
+        return outputs
+    def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
+        if self.bn_level_first:
+            return self._forward_level_first(x)
+        else:
+            return self._forward(x)
+def _init_weight(m, n='', ):
+    """ Weight initialization as per Tensorflow official implementations.
+    """
+    def _fan_in_out(w, groups=1):
+        dimensions = w.dim()
+        if dimensions < 2:
+            raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")
+        num_input_fmaps = w.size(1)
+        num_output_fmaps = w.size(0)
+        receptive_field_size = 1
+        if w.dim() > 2:
+            receptive_field_size = w[0][0].numel()
+        fan_in = num_input_fmaps * receptive_field_size
+        fan_out = num_output_fmaps * receptive_field_size
+        fan_out //= groups
+        return fan_in, fan_out
+    def _glorot_uniform(w, gain=1, groups=1):
+        fan_in, fan_out = _fan_in_out(w, groups)
+        gain /= max(1., (fan_in + fan_out) / 2.)  # fan avg
+        limit = math.sqrt(3.0 * gain)
+        w.data.uniform_(-limit, limit)
+    def _variance_scaling(w, gain=1, groups=1):
+        fan_in, fan_out = _fan_in_out(w, groups)
+        gain /= max(1., fan_in)  # fan in
+        # gain /= max(1., (fan_in + fan_out) / 2.)  # fan
+        # should it be normal or trunc normal? using normal for now since no good trunc in PT
+        # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
+        # std = math.sqrt(gain) / .87962566103423978
+        # w.data.trunc_normal(std=std)
+        std = math.sqrt(gain)
+        w.data.normal_(std=std)
+    if isinstance(m, SeparableConv2d):
+        if 'box_net' in n or 'class_net' in n:
+            _variance_scaling(m.conv_dw.weight, groups=m.conv_dw.groups)
+            _variance_scaling(m.conv_pw.weight)
+            if m.conv_pw.bias is not None:
+                if 'class_net.predict' in n:
+                    m.conv_pw.bias.data.fill_(-math.log((1 - 0.01) / 0.01))
+                else:
+                    m.conv_pw.bias.data.zero_()
+        else:
+            _glorot_uniform(m.conv_dw.weight, groups=m.conv_dw.groups)
+            _glorot_uniform(m.conv_pw.weight)
+            if m.conv_pw.bias is not None:
+                m.conv_pw.bias.data.zero_()
+    elif isinstance(m, ConvBnAct2d):
+        if 'box_net' in n or 'class_net' in n:
+            m.conv.weight.data.normal_(std=.01)
+            if m.conv.bias is not None:
+                if 'class_net.predict' in n:
+                    m.conv.bias.data.fill_(-math.log((1 - 0.01) / 0.01))
+                else:
+                    m.conv.bias.data.zero_()
+        else:
+            _glorot_uniform(m.conv.weight)
+            if m.conv.bias is not None:
+                m.conv.bias.data.zero_()
+    elif isinstance(m, nn.BatchNorm2d):
+        # looks like all bn init the same?
+        m.weight.data.fill_(1.0)
+        m.bias.data.zero_()
+def _init_weight_alt(m, n='', ):
+    """ Weight initialization alternative, based on EfficientNet bacbkone init w/ class bias addition
+    NOTE: this will likely be removed after some experimentation
+    """
+    if isinstance(m, nn.Conv2d):
+        fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        fan_out //= m.groups
+        m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+        if m.bias is not None:
+            if 'class_net.predict' in n:
+                m.bias.data.fill_(-math.log((1 - 0.01) / 0.01))
+            else:
+                m.bias.data.zero_()
+    elif isinstance(m, nn.BatchNorm2d):
+        m.weight.data.fill_(1.0)
+        m.bias.data.zero_()
+def get_feature_info(backbone):
+    if isinstance(backbone.feature_info, Callable):
+        # old accessor for timm versions <= 0.1.30, efficientnet and mobilenetv3 and related nets only
+        feature_info = [dict(num_chs=f['num_chs'], reduction=f['reduction'])
+                        for i, f in enumerate(backbone.feature_info())]
+    else:
+        # new feature info accessor, timm >= 0.2, all models supported
+        feature_info = backbone.feature_info.get_dicts(keys=['num_chs', 'reduction'])
+    return feature_info
+class EfficientDet(nn.Module):
+    def __init__(self, config, pretrained_backbone=True, alternate_init=False):
+        super(EfficientDet, self).__init__()
+        self.config = config
+        set_config_readonly(self.config)
+        self.backbone = create_model(
+            config.backbone_name, features_only=True, out_indices=(2, 3, 4),
+            pretrained=pretrained_backbone, **config.backbone_args)
+        feature_info = get_feature_info(self.backbone)
+        self.fpn = BiFpn(self.config, feature_info)
+        self.class_net = HeadNet(self.config, num_outputs=self.config.num_classes)
+        self.box_net = HeadNet(self.config, num_outputs=4)
+        for n, m in self.named_modules():
+            if 'backbone' not in n:
+                if alternate_init:
+                    _init_weight_alt(m, n)
+                else:
+                    _init_weight(m, n)
+    @torch.jit.ignore()
+    def reset_head(self, num_classes=None, aspect_ratios=None, num_scales=None, alternate_init=False):
+        reset_class_head = False
+        reset_box_head = False
+        set_config_writeable(self.config)
+        if num_classes is not None:
+            reset_class_head = True
+            self.config.num_classes = num_classes
+        if aspect_ratios is not None:
+            reset_box_head = True
+            self.config.aspect_ratios = aspect_ratios
+        if num_scales is not None:
+            reset_box_head = True
+            self.config.num_scales = num_scales
+        set_config_readonly(self.config)
+        if reset_class_head:
+            self.class_net = HeadNet(self.config, num_outputs=self.config.num_classes)
+            for n, m in self.class_net.named_modules(prefix='class_net'):
+                if alternate_init:
+                    _init_weight_alt(m, n)
+                else:
+                    _init_weight(m, n)
+        if reset_box_head:
+            self.box_net = HeadNet(self.config, num_outputs=4)
+            for n, m in self.box_net.named_modules(prefix='box_net'):
+                if alternate_init:
+                    _init_weight_alt(m, n)
+                else:
+                    _init_weight(m, n)
+    @torch.jit.ignore()
+    def toggle_head_bn_level_first(self):
+        """ Toggle the head batchnorm layers between being access with feature_level first vs repeat
+        """
+        self.class_net.toggle_bn_level_first()
+        self.box_net.toggle_bn_level_first()
+    def forward(self, x):
+        x = self.backbone(x)
+        x = self.fpn(x)
+        x_class = self.class_net(x)
+        x_box = self.box_net(x)
+        return x_class, x_box

efficientdet/effdet/evaluation/README.md ADDED Viewed

	@@ -0,0 +1,7 @@

+# Tensorflow Models Evaluation
+The code in this folder has been extracted and adapted from evaluation/evaluator code at https://github.com/tensorflow/models/tree/master/research/object_detection/utils
+Original code is licensed Apache 2.0, Copyright Google Inc.
+https://github.com/tensorflow/models/blob/master/LICENSE

efficientdet/effdet/evaluation/__init__.py ADDED Viewed

File without changes

efficientdet/effdet/evaluation/detection_evaluator.py ADDED Viewed

	@@ -0,0 +1,590 @@

+from abc import ABCMeta
+from abc import abstractmethod
+#import collections
+import logging
+import unicodedata
+import numpy as np
+from .fields import InputDataFields, DetectionResultFields
+from .object_detection_evaluation import ObjectDetectionEvaluation
+def create_category_index(categories):
+    """Creates dictionary of COCO compatible categories keyed by category id.
+    Args:
+        categories: a list of dicts, each of which has the following keys:
+            'id': (required) an integer id uniquely identifying this category.
+            'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza'.
+    Returns:
+        category_index: a dict containing the same entries as categories, but keyed
+            by the 'id' field of each category.
+    """
+    category_index = {}
+    for cat in categories:
+        category_index[cat['id']] = cat
+    return category_index
+class DetectionEvaluator(metaclass=ABCMeta):
+    """Interface for object detection evalution classes.
+    Example usage of the Evaluator:
+    ------------------------------
+    evaluator = DetectionEvaluator(categories)
+    # Detections and groundtruth for image 1.
+    evaluator.add_single_gt_image_info(...)
+    evaluator.add_single_detected_image_info(...)
+    # Detections and groundtruth for image 2.
+    evaluator.add_single_gt_image_info(...)
+    evaluator.add_single_detected_image_info(...)
+    metrics_dict = evaluator.evaluation()
+    """
+    def __init__(self, categories):
+        """Constructor.
+        Args:
+          categories: A list of dicts, each of which has the following keys -
+            'id': (required) an integer id uniquely identifying this category.
+            'name': (required) string representing category name e.g., 'cat', 'dog'.
+        """
+        self._categories = categories
+    def observe_result_dict_for_single_example(self, eval_dict):
+        """Observes an evaluation result dict for a single example.
+        When executing eagerly, once all observations have been observed by this
+        method you can use `.evaluation()` to get the final metrics.
+        When using `tf.estimator.Estimator` for evaluation this function is used by
+        `get_estimator_eval_metric_ops()` to construct the metric update op.
+        Args:
+            eval_dict: A dictionary that holds tensors for evaluating an object
+                detection model, returned from
+                eval_util.result_dict_for_single_example().
+        Returns:
+            None when executing eagerly, or an update_op that can be used to update
+            the eval metrics in `tf.estimator.EstimatorSpec`.
+        """
+        raise NotImplementedError('Not implemented for this evaluator!')
+    @abstractmethod
+    def add_single_ground_truth_image_info(self, image_id, gt_dict):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            gt_dict: A dictionary of groundtruth numpy arrays required for evaluations.
+        """
+        pass
+    @abstractmethod
+    def add_single_detected_image_info(self, image_id, detections_dict):
+        """Adds detections for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            detections_dict: A dictionary of detection numpy arrays required for evaluation.
+        """
+        pass
+    @abstractmethod
+    def evaluate(self):
+        """Evaluates detections and returns a dictionary of metrics."""
+        pass
+    @abstractmethod
+    def clear(self):
+        """Clears the state to prepare for a fresh evaluation."""
+        pass
+class ObjectDetectionEvaluator(DetectionEvaluator):
+    """A class to evaluation detections."""
+    def __init__(self,
+                 categories,
+                 matching_iou_threshold=0.5,
+                 recall_lower_bound=0.0,
+                 recall_upper_bound=1.0,
+                 evaluate_corlocs=False,
+                 evaluate_precision_recall=False,
+                 metric_prefix=None,
+                 use_weighted_mean_ap=False,
+                 evaluate_masks=False,
+                 group_of_weight=0.0):
+        """Constructor.
+        Args:
+            categories: A list of dicts, each of which has the following keys -
+                'id': (required) an integer id uniquely identifying this category.
+                'name': (required) string representing category name e.g., 'cat', 'dog'.
+            matching_iou_threshold: IOU threshold to use for matching groundtruth boxes to detection boxes.
+            recall_lower_bound: lower bound of recall operating area.
+            recall_upper_bound: upper bound of recall operating area.
+            evaluate_corlocs: (optional) boolean which determines if corloc scores are to be returned or not.
+            evaluate_precision_recall: (optional) boolean which determines if
+                precision and recall values are to be returned or not.
+            metric_prefix: (optional) string prefix for metric name; if None, no prefix is used.
+            use_weighted_mean_ap: (optional) boolean which determines if the mean
+                average precision is computed directly from the scores and tp_fp_labels of all classes.
+            evaluate_masks: If False, evaluation will be performed based on boxes. If
+                True, mask evaluation will be performed instead.
+            group_of_weight: Weight of group-of boxes.If set to 0, detections of the
+                correct class within a group-of box are ignored. If weight is > 0, then
+                if at least one detection falls within a group-of box with
+                matching_iou_threshold, weight group_of_weight is added to true
+                positives. Consequently, if no detection falls within a group-of box,
+                weight group_of_weight is added to false negatives.
+        Raises:
+            ValueError: If the category ids are not 1-indexed.
+        """
+        super(ObjectDetectionEvaluator, self).__init__(categories)
+        self._num_classes = max([cat['id'] for cat in categories])
+        if min(cat['id'] for cat in categories) < 1:
+            raise ValueError('Classes should be 1-indexed.')
+        self._matching_iou_threshold = matching_iou_threshold
+        self._recall_lower_bound = recall_lower_bound
+        self._recall_upper_bound = recall_upper_bound
+        self._use_weighted_mean_ap = use_weighted_mean_ap
+        self._label_id_offset = 1
+        self._evaluate_masks = evaluate_masks
+        self._group_of_weight = group_of_weight
+        self._evaluation = ObjectDetectionEvaluation(
+            num_gt_classes=self._num_classes,
+            matching_iou_threshold=self._matching_iou_threshold,
+            recall_lower_bound=self._recall_lower_bound,
+            recall_upper_bound=self._recall_upper_bound,
+            use_weighted_mean_ap=self._use_weighted_mean_ap,
+            label_id_offset=self._label_id_offset,
+            group_of_weight=self._group_of_weight)
+        self._image_ids = set([])
+        self._evaluate_corlocs = evaluate_corlocs
+        self._evaluate_precision_recall = evaluate_precision_recall
+        self._metric_prefix = (metric_prefix + '_') if metric_prefix else ''
+        self._build_metric_names()
+    def _build_metric_names(self):
+        """Builds a list with metric names."""
+        if self._recall_lower_bound > 0.0 or self._recall_upper_bound < 1.0:
+            self._metric_names = [
+                self._metric_prefix + 'Precision/mAP@{}IOU@[{:.1f},{:.1f}]Recall'.format(
+                    self._matching_iou_threshold, self._recall_lower_bound, self._recall_upper_bound)
+            ]
+        else:
+            self._metric_names = [
+                self._metric_prefix + 'Precision/mAP@{}IOU'.format(self._matching_iou_threshold)
+            ]
+        if self._evaluate_corlocs:
+            self._metric_names.append(
+                self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format(self._matching_iou_threshold))
+        category_index = create_category_index(self._categories)
+        for idx in range(self._num_classes):
+            if idx + self._label_id_offset in category_index:
+                category_name = category_index[idx + self._label_id_offset]['name']
+                category_name = unicodedata.normalize('NFKD', category_name)
+                self._metric_names.append(
+                    self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
+                        self._matching_iou_threshold, category_name))
+                if self._evaluate_corlocs:
+                    self._metric_names.append(
+                        self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
+                            self._matching_iou_threshold, category_name))
+    def add_single_ground_truth_image_info(self, image_id, gt_dict):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            gt_dict: A dictionary containing -
+                InputDataFields.gt_boxes: float32 numpy array
+                    of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
+                    the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+                InputDataFields.gt_classes: integer numpy array
+                    of shape [num_boxes] containing 1-indexed groundtruth classes for the boxes.
+                InputDataFields.gt_difficult: Optional length M numpy boolean array
+                    denoting whether a ground truth box is a difficult instance or not.
+                    This field is optional to support the case that no boxes are difficult.
+                InputDataFields.gt_instance_masks: Optional numpy array of shape
+                    [num_boxes, height, width] with values in {0, 1}.
+        Raises:
+            ValueError: On adding groundtruth for an image more than once. Will also
+                raise error if instance masks are not in groundtruth dictionary.
+        """
+        if image_id in self._image_ids:
+            return
+        gt_classes = gt_dict[InputDataFields.gt_classes] - self._label_id_offset
+        # If the key is not present in the gt_dict or the array is empty
+        # (unless there are no annotations for the groundtruth on this image)
+        # use values from the dictionary or insert None otherwise.
+        if (InputDataFields.gt_difficult in gt_dict and
+                (gt_dict[InputDataFields.gt_difficult].size or not gt_classes.size)):
+            gt_difficult = gt_dict[InputDataFields.gt_difficult]
+        else:
+            gt_difficult = None
+            # FIXME disable difficult flag warning, will support flag eventually
+            # if not len(self._image_ids) % 1000:
+            #     logging.warning('image %s does not have groundtruth difficult flag specified', image_id)
+        gt_masks = None
+        if self._evaluate_masks:
+            if InputDataFields.gt_instance_masks not in gt_dict:
+                raise ValueError('Instance masks not in groundtruth dictionary.')
+            gt_masks = gt_dict[InputDataFields.gt_instance_masks]
+        self._evaluation.add_single_ground_truth_image_info(
+            image_key=image_id,
+            gt_boxes=gt_dict[InputDataFields.gt_boxes],
+            gt_class_labels=gt_classes,
+            gt_is_difficult_list=gt_difficult,
+            gt_masks=gt_masks)
+        self._image_ids.update([image_id])
+    def add_single_detected_image_info(self, image_id, detections_dict):
+        """Adds detections for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            detections_dict: A dictionary containing -
+                DetectionResultFields.detection_boxes: float32 numpy
+                    array of shape [num_boxes, 4] containing `num_boxes` detection boxes
+                    of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+                DetectionResultFields.detection_scores: float32 numpy
+                    array of shape [num_boxes] containing detection scores for the boxes.
+                DetectionResultFields.detection_classes: integer numpy
+                    array of shape [num_boxes] containing 1-indexed detection classes for the boxes.
+                DetectionResultFields.detection_masks: uint8 numpy array
+                    of shape [num_boxes, height, width] containing `num_boxes` masks of
+                    values ranging between 0 and 1.
+        Raises:
+            ValueError: If detection masks are not in detections dictionary.
+        """
+        detection_classes = detections_dict[DetectionResultFields.detection_classes] - self._label_id_offset
+        detection_masks = None
+        if self._evaluate_masks:
+            if DetectionResultFields.detection_masks not in detections_dict:
+                raise ValueError('Detection masks not in detections dictionary.')
+            detection_masks = detections_dict[DetectionResultFields.detection_masks]
+        self._evaluation.add_single_detected_image_info(
+            image_key=image_id,
+            detected_boxes=detections_dict[DetectionResultFields.detection_boxes],
+            detected_scores=detections_dict[DetectionResultFields.detection_scores],
+            detected_class_labels=detection_classes,
+            detected_masks=detection_masks)
+    def evaluate(self):
+        """Compute evaluation result.
+        Returns:
+          A dictionary of metrics with the following fields -
+          1. summary_metrics:
+                '<prefix if not empty>_Precision/mAP@<matching_iou_threshold>IOU': mean
+                average precision at the specified IOU threshold.
+          2. per_category_ap: category specific results with keys of the form
+                '<prefix if not empty>_PerformanceByCategory/
+                mAP@<matching_iou_threshold>IOU/category'.
+        """
+        metrics = self._evaluation.evaluate()
+        pascal_metrics = {self._metric_names[0]: metrics['mean_ap']}
+        if self._evaluate_corlocs:
+            pascal_metrics[self._metric_names[1]] = metrics['mean_corloc']
+        category_index = create_category_index(self._categories)
+        for idx in range(metrics['per_class_ap'].size):
+            if idx + self._label_id_offset in category_index:
+                category_name = category_index[idx + self._label_id_offset]['name']
+                category_name = unicodedata.normalize('NFKD', category_name)
+                display_name = self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
+                    self._matching_iou_threshold, category_name)
+                pascal_metrics[display_name] = metrics['per_class_ap'][idx]
+                # Optionally add precision and recall values
+                if self._evaluate_precision_recall:
+                    display_name = self._metric_prefix + 'PerformanceByCategory/Precision@{}IOU/{}'.format(
+                        self._matching_iou_threshold, category_name)
+                    pascal_metrics[display_name] = metrics['per_class_precision'][idx]
+                    display_name = self._metric_prefix + 'PerformanceByCategory/Recall@{}IOU/{}'.format(
+                        self._matching_iou_threshold, category_name)
+                    pascal_metrics[display_name] = metrics['per_class_precision'][idx]
+                # Optionally add CorLoc metrics.classes
+                if self._evaluate_corlocs:
+                    display_name = self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
+                        self._matching_iou_threshold, category_name)
+                    pascal_metrics[display_name] = metrics['per_class_corloc'][idx]
+        return pascal_metrics
+    def clear(self):
+        """Clears the state to prepare for a fresh evaluation."""
+        self._evaluation = ObjectDetectionEvaluation(
+            num_gt_classes=self._num_classes,
+            matching_iou_threshold=self._matching_iou_threshold,
+            use_weighted_mean_ap=self._use_weighted_mean_ap,
+            label_id_offset=self._label_id_offset)
+        self._image_ids.clear()
+class PascalDetectionEvaluator(ObjectDetectionEvaluator):
+    """A class to evaluation detections using PASCAL metrics."""
+    def __init__(self, categories, matching_iou_threshold=0.5):
+        super(PascalDetectionEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold=matching_iou_threshold,
+            evaluate_corlocs=False,
+            metric_prefix='PascalBoxes',
+            use_weighted_mean_ap=False)
+class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator):
+    """A class to evaluation detections using weighted PASCAL metrics.
+    Weighted PASCAL metrics computes the mean average precision as the average
+    precision given the scores and tp_fp_labels of all classes. In comparison,
+    PASCAL metrics computes the mean average precision as the mean of the
+    per-class average precisions.
+    This definition is very similar to the mean of the per-class average
+    precisions weighted by class frequency. However, they are typically not the
+    same as the average precision is not a linear function of the scores and
+    tp_fp_labels.
+    """
+    def __init__(self, categories, matching_iou_threshold=0.5):
+        super(WeightedPascalDetectionEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold=matching_iou_threshold,
+            evaluate_corlocs=False,
+            metric_prefix='WeightedPascalBoxes',
+            use_weighted_mean_ap=True)
+class PrecisionAtRecallDetectionEvaluator(ObjectDetectionEvaluator):
+    """A class to evaluation detections using precision@recall metrics."""
+    def __init__(self,
+                 categories,
+                 matching_iou_threshold=0.5,
+                 recall_lower_bound=0.,
+                 recall_upper_bound=1.0):
+        super(PrecisionAtRecallDetectionEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold=matching_iou_threshold,
+            recall_lower_bound=recall_lower_bound,
+            recall_upper_bound=recall_upper_bound,
+            evaluate_corlocs=False,
+            metric_prefix='PrecisionAtRecallBoxes',
+            use_weighted_mean_ap=False)
+class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
+    """A class to evaluation detections using Open Images V2 metrics.
+      Open Images V2 introduce group_of type of bounding boxes and this metric
+      handles those boxes appropriately.
+    """
+    def __init__(self,
+                 categories,
+                 matching_iou_threshold=0.5,
+                 evaluate_masks=False,
+                 evaluate_corlocs=False,
+                 metric_prefix='OpenImagesV5',
+                 group_of_weight=0.0):
+        """Constructor.
+        Args:
+            categories: A list of dicts, each of which has the following keys -
+                'id': (required) an integer id uniquely identifying this category.
+                'name': (required) string representing category name e.g., 'cat', 'dog'.
+            matching_iou_threshold: IOU threshold to use for matching groundtruth
+                boxes to detection boxes.
+            evaluate_masks: if True, evaluator evaluates masks.
+            evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
+            metric_prefix: Prefix name of the metric.
+            group_of_weight: Weight of the group-of bounding box. If set to 0 (default
+                for Open Images V2 detection protocol), detections of the correct class
+                within a group-of box are ignored. If weight is > 0, then if at least
+                one detection falls within a group-of box with matching_iou_threshold,
+                weight group_of_weight is added to true positives. Consequently, if no
+                detection falls within a group-of box, weight group_of_weight is added
+                to false negatives.
+        """
+        super(OpenImagesDetectionEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold,
+            evaluate_corlocs,
+            metric_prefix=metric_prefix,
+            group_of_weight=group_of_weight,
+            evaluate_masks=evaluate_masks)
+    def add_single_ground_truth_image_info(self, image_id, gt_dict):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            gt_dict: A dictionary containing -
+                InputDataFields.gt_boxes: float32 numpy array
+                    of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
+                    the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+                InputDataFields.gt_classes: integer numpy array
+                    of shape [num_boxes] containing 1-indexed groundtruth classes for the boxes.
+                InputDataFields.gt_group_of: Optional length M
+                    numpy boolean array denoting whether a groundtruth box contains a group of instances.
+        Raises:
+            ValueError: On adding groundtruth for an image more than once.
+        """
+        if image_id in self._image_ids:
+            return
+        gt_classes = (gt_dict[InputDataFields.gt_classes] - self._label_id_offset)
+        # If the key is not present in the gt_dict or the array is empty
+        # (unless there are no annotations for the groundtruth on this image)
+        # use values from the dictionary or insert None otherwise.
+        if (InputDataFields.gt_group_of in gt_dict and
+                (gt_dict[InputDataFields.gt_group_of].size or not gt_classes.size)):
+            gt_group_of = gt_dict[InputDataFields.gt_group_of]
+        else:
+            gt_group_of = None
+            # FIXME disable warning for now, will add group_of flag eventually
+            # if not len(self._image_ids) % 1000:
+            #     logging.warning('image %s does not have groundtruth group_of flag specified', image_id)
+        if self._evaluate_masks:
+            gt_masks = gt_dict[InputDataFields.gt_instance_masks]
+        else:
+            gt_masks = None
+        self._evaluation.add_single_ground_truth_image_info(
+            image_id,
+            gt_dict[InputDataFields.gt_boxes],
+            gt_classes,
+            gt_is_difficult_list=None,
+            gt_is_group_of_list=gt_group_of,
+            gt_masks=gt_masks)
+        self._image_ids.update([image_id])
+class OpenImagesChallengeEvaluator(OpenImagesDetectionEvaluator):
+    """A class implements Open Images Challenge metrics.
+      Both Detection and Instance Segmentation evaluation metrics are implemented.
+      Open Images Challenge Detection metric has two major changes in comparison
+      with Open Images V2 detection metric:
+      - a custom weight might be specified for detecting an object contained in a group-of box.
+      - verified image-level labels should be explicitly provided for evaluation: in case an
+      image has neither positive nor negative image level label of class c, all detections of
+      this class on this image will be ignored.
+      Open Images Challenge Instance Segmentation metric allows to measure performance
+      of models in case of incomplete annotations: some instances are
+      annotations only on box level and some - on image-level. In addition,
+      image-level labels are taken into account as in detection metric.
+      Open Images Challenge Detection metric default parameters:
+      evaluate_masks = False
+      group_of_weight = 1.0
+      Open Images Challenge Instance Segmentation metric default parameters:
+      evaluate_masks = True
+      (group_of_weight will not matter)
+    """
+    def __init__(
+            self,
+            categories,
+            evaluate_masks=False,
+            matching_iou_threshold=0.5,
+            evaluate_corlocs=False,
+            group_of_weight=1.0):
+        """Constructor.
+        Args:
+            categories: A list of dicts, each of which has the following keys -
+                'id': (required) an integer id uniquely identifying this category.
+                'name': (required) string representing category name e.g., 'cat', 'dog'.
+            evaluate_masks: set to true for instance segmentation metric and to false
+                for detection metric.
+            matching_iou_threshold: IOU threshold to use for matching groundtruth
+                boxes to detection boxes.
+            evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
+            group_of_weight: Weight of group-of boxes. If set to 0, detections of the
+                correct class within a group-of box are ignored. If weight is > 0, then
+                if at least one detection falls within a group-of box with
+                matching_iou_threshold, weight group_of_weight is added to true
+                positives. Consequently, if no detection falls within a group-of box,
+                weight group_of_weight is added to false negatives.
+        """
+        if not evaluate_masks:
+            metrics_prefix = 'OpenImagesDetectionChallenge'
+        else:
+            metrics_prefix = 'OpenImagesInstanceSegmentationChallenge'
+        super(OpenImagesChallengeEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold,
+            evaluate_masks=evaluate_masks,
+            evaluate_corlocs=evaluate_corlocs,
+            group_of_weight=group_of_weight,
+            metric_prefix=metrics_prefix)
+        self._evaluatable_labels = {}
+    def add_single_ground_truth_image_info(self, image_id, gt_dict):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            gt_dict: A dictionary containing -
+                InputDataFields.gt_boxes: float32 numpy array of shape [num_boxes, 4]
+                    containing `num_boxes` groundtruth boxes of the format [ymin, xmin, ymax, xmax]
+                    in absolute image coordinates.
+                InputDataFields.gt_classes: integer numpy array of shape [num_boxes]
+                    containing 1-indexed groundtruth classes for the boxes.
+                InputDataFields.gt_image_classes: integer 1D
+                    numpy array containing all classes for which labels are verified.
+                InputDataFields.gt_group_of: Optional length M
+                numpy boolean array denoting whether a groundtruth box contains a group of instances.
+        Raises:
+            ValueError: On adding groundtruth for an image more than once.
+        """
+        super(OpenImagesChallengeEvaluator,
+              self).add_single_ground_truth_image_info(image_id, gt_dict)
+        input_fields = InputDataFields
+        gt_classes = gt_dict[input_fields.gt_classes] - self._label_id_offset
+        image_classes = np.array([], dtype=int)
+        if input_fields.gt_image_classes in gt_dict:
+            image_classes = gt_dict[input_fields.gt_image_classes]
+        elif input_fields.gt_labeled_classes in gt_dict:
+            image_classes = gt_dict[input_fields.gt_labeled_classes]
+        image_classes -= self._label_id_offset
+        self._evaluatable_labels[image_id] = np.unique(
+            np.concatenate((image_classes, gt_classes)))
+    def add_single_detected_image_info(self, image_id, detections_dict):
+        """Adds detections for a single image to be used for evaluation.
+        Args:
+          image_id: A unique string/integer identifier for the image.
+          detections_dict: A dictionary containing -
+            DetectionResultFields.detection_boxes: float32 numpy
+              array of shape [num_boxes, 4] containing `num_boxes` detection boxes
+              of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+            DetectionResultFields.detection_scores: float32 numpy
+              array of shape [num_boxes] containing detection scores for the boxes.
+            DetectionResultFields.detection_classes: integer numpy
+              array of shape [num_boxes] containing 1-indexed detection classes for
+              the boxes.
+        Raises:
+          ValueError: If detection masks are not in detections dictionary.
+        """
+        if image_id not in self._image_ids:
+            # Since for the correct work of evaluator it is assumed that groundtruth
+            # is inserted first we make sure to break the code if is it not the case.
+            self._image_ids.update([image_id])
+            self._evaluatable_labels[image_id] = np.array([])
+        detection_classes = detections_dict[DetectionResultFields.detection_classes] - self._label_id_offset
+        allowed_classes = np.where(np.isin(detection_classes, self._evaluatable_labels[image_id]))
+        detection_classes = detection_classes[allowed_classes]
+        detected_boxes = detections_dict[DetectionResultFields.detection_boxes][allowed_classes]
+        detected_scores = detections_dict[DetectionResultFields.detection_scores][allowed_classes]
+        if self._evaluate_masks:
+            detection_masks = detections_dict[DetectionResultFields.detection_masks][allowed_classes]
+        else:
+            detection_masks = None
+        self._evaluation.add_single_detected_image_info(
+            image_key=image_id,
+            detected_boxes=detected_boxes,
+            detected_scores=detected_scores,
+            detected_class_labels=detection_classes,
+            detected_masks=detection_masks)
+    def clear(self):
+        """Clears stored data."""
+        super(OpenImagesChallengeEvaluator, self).clear()
+        self._evaluatable_labels.clear()

efficientdet/effdet/evaluation/fields.py ADDED Viewed

	@@ -0,0 +1,105 @@

+class InputDataFields(object):
+    """Names for the input tensors.
+    Holds the standard data field names to use for identifying input tensors. This
+    should be used by the decoder to identify keys for the returned tensor_dict
+    containing input tensors. And it should be used by the model to identify the
+    tensors it needs.
+    Attributes:
+        image: image.
+        image_additional_channels: additional channels.
+        key: unique key corresponding to image.
+        filename: original filename of the dataset (without common path).
+        gt_image_classes: image-level class labels.
+        gt_image_confidences: image-level class confidences.
+        gt_labeled_classes: image-level annotation that indicates the
+            classes for which an image has been labeled.
+        gt_boxes: coordinates of the ground truth boxes in the image.
+        gt_classes: box-level class labels.
+        gt_confidences: box-level class confidences. The shape should be
+            the same as the shape of gt_classes.
+        gt_label_types: box-level label types (e.g. explicit negative).
+        gt_is_crowd: [DEPRECATED, use gt_group_of instead]
+            is the groundtruth a single object or a crowd.
+        gt_area: area of a groundtruth segment.
+        gt_difficult: is a `difficult` object
+        gt_group_of: is a `group_of` objects, e.g. multiple objects of the
+            same class, forming a connected group, where instances are heavily
+            occluding each other.
+        gt_instance_masks: ground truth instance masks.
+        gt_instance_boundaries: ground truth instance boundaries.
+        gt_instance_classes: instance mask-level class labels.
+        gt_label_weights: groundtruth label weights.
+        gt_weights: groundtruth weight factor for bounding boxes.
+        image_height: height of images, used to decode
+        image_width: width of images, used to decode
+    """
+    image = 'image'
+    key = 'image_id'
+    filename = 'filename'
+    gt_boxes = 'bbox'
+    gt_classes = 'cls'
+    gt_confidences = 'confidences'
+    gt_label_types = 'label_types'
+    gt_image_classes = 'img_cls'
+    gt_image_confidences = 'img_confidences'
+    gt_labeled_classes = 'labeled_cls'
+    gt_is_crowd = 'is_crowd'
+    gt_area = 'area'
+    gt_difficult = 'difficult'
+    gt_group_of = 'group_of'
+    gt_instance_masks = 'instance_masks'
+    gt_instance_boundaries = 'instance_boundaries'
+    gt_instance_classes = 'instance_classes'
+    image_height = 'img_height'
+    image_width = 'img_width'
+    image_size = 'img_size'
+class DetectionResultFields(object):
+    """Naming conventions for storing the output of the detector.
+    Attributes:
+        source_id: source of the original image.
+        key: unique key corresponding to image.
+        detection_boxes: coordinates of the detection boxes in the image.
+        detection_scores: detection scores for the detection boxes in the image.
+        detection_multiclass_scores: class score distribution (including background)
+            for detection boxes in the image including background class.
+        detection_classes: detection-level class labels.
+        detection_masks: contains a segmentation mask for each detection box.
+    """
+    key = 'image_id'
+    detection_boxes = 'bbox'
+    detection_scores = 'score'
+    detection_classes = 'cls'
+    detection_masks = 'masks'
+class BoxListFields(object):
+    """Naming conventions for BoxLists.
+    Attributes:
+        boxes: bounding box coordinates.
+        classes: classes per bounding box.
+        scores: scores per bounding box.
+        weights: sample weights per bounding box.
+        objectness: objectness score per bounding box.
+        masks: masks per bounding box.
+        boundaries: boundaries per bounding box.
+        keypoints: keypoints per bounding box.
+        keypoint_heatmaps: keypoint heatmaps per bounding box.
+        is_crowd: is_crowd annotation per bounding box.
+    """
+    boxes = 'boxes'
+    classes = 'classes'
+    scores = 'scores'
+    weights = 'weights'
+    confidences = 'confidences'
+    objectness = 'objectness'
+    masks = 'masks'
+    boundaries = 'boundaries'
+    keypoints = 'keypoints'
+    keypoint_visibilities = 'keypoint_visibilities'
+    keypoint_heatmaps = 'keypoint_heatmaps'
+    is_crowd = 'is_crowd'
+    group_of = 'group_of'

efficientdet/effdet/evaluation/metrics.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import numpy as np
+def compute_precision_recall(scores, labels, num_gt):
+    """Compute precision and recall.
+    Args:
+        scores: A float numpy array representing detection score
+        labels: A float numpy array representing weighted true/false positive labels
+        num_gt: Number of ground truth instances
+    Raises:
+        ValueError: if the input is not of the correct format
+    Returns:
+        precision: Fraction of positive instances over detected ones. This value is
+            None if no ground truth labels are present.
+        recall: Fraction of detected positive instance over all positive instances.
+            This value is None if no ground truth labels are present.
+    """
+    if not isinstance(labels, np.ndarray) or len(labels.shape) != 1:
+        raise ValueError("labels must be single dimension numpy array")
+    if labels.dtype != np.float and labels.dtype != np.bool:
+        raise ValueError("labels type must be either bool or float")
+    if not isinstance(scores, np.ndarray) or len(scores.shape) != 1:
+        raise ValueError("scores must be single dimension numpy array")
+    if num_gt < np.sum(labels):
+        raise ValueError("Number of true positives must be smaller than num_gt.")
+    if len(scores) != len(labels):
+        raise ValueError("scores and labels must be of the same size.")
+    if num_gt == 0:
+        return None, None
+    sorted_indices = np.argsort(scores)
+    sorted_indices = sorted_indices[::-1]
+    true_positive_labels = labels[sorted_indices]
+    false_positive_labels = (true_positive_labels <= 0).astype(float)
+    cum_true_positives = np.cumsum(true_positive_labels)
+    cum_false_positives = np.cumsum(false_positive_labels)
+    precision = cum_true_positives.astype(float) / (cum_true_positives + cum_false_positives)
+    recall = cum_true_positives.astype(float) / num_gt
+    return precision, recall
+def compute_average_precision(precision, recall):
+    """Compute Average Precision according to the definition in VOCdevkit.
+    Precision is modified to ensure that it does not decrease as recall
+    decrease.
+    Args:
+        precision: A float [N, 1] numpy array of precisions
+        recall: A float [N, 1] numpy array of recalls
+    Raises:
+        ValueError: if the input is not of the correct format
+    Returns:
+        average_precison: The area under the precision recall curve. NaN if
+            precision and recall are None.
+    """
+    if precision is None:
+        if recall is not None:
+            raise ValueError("If precision is None, recall must also be None")
+        return np.NAN
+    if not isinstance(precision, np.ndarray) or not isinstance(recall, np.ndarray):
+        raise ValueError("precision and recall must be numpy array")
+    if precision.dtype != np.float or recall.dtype != np.float:
+        raise ValueError("input must be float numpy array.")
+    if len(precision) != len(recall):
+        raise ValueError("precision and recall must be of the same size.")
+    if not precision.size:
+        return 0.0
+    if np.amin(precision) < 0 or np.amax(precision) > 1:
+        raise ValueError("Precision must be in the range of [0, 1].")
+    if np.amin(recall) < 0 or np.amax(recall) > 1:
+        raise ValueError("recall must be in the range of [0, 1].")
+    if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)):
+        raise ValueError("recall must be a non-decreasing array")
+    recall = np.concatenate([[0], recall, [1]])
+    precision = np.concatenate([[0], precision, [0]])
+    # Preprocess precision to be a non-decreasing array
+    for i in range(len(precision) - 2, -1, -1):
+        precision[i] = np.maximum(precision[i], precision[i + 1])
+    indices = np.where(recall[1:] != recall[:-1])[0] + 1
+    average_precision = np.sum((recall[indices] - recall[indices - 1]) * precision[indices])
+    return average_precision
+def compute_cor_loc(num_gt_imgs_per_class, num_images_correctly_detected_per_class):
+    """Compute CorLoc according to the definition in the following paper.
+    https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
+    Returns nans if there are no ground truth images for a class.
+    Args:
+        num_gt_imgs_per_class: 1D array, representing number of images containing
+            at least one object instance of a particular class
+        num_images_correctly_detected_per_class: 1D array, representing number of
+            images that are correctly detected at least one object instance of a particular class
+    Returns:
+        corloc_per_class: A float numpy array represents the corloc score of each class
+    """
+    return np.where(
+        num_gt_imgs_per_class == 0, np.nan,
+        num_images_correctly_detected_per_class / num_gt_imgs_per_class)
+def compute_median_rank_at_k(tp_fp_list, k):
+    """Computes MedianRank@k, where k is the top-scoring labels.
+    Args:
+        tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
+            detection on a single image, where the detections are sorted by score in
+            descending order. Further, each numpy array element can have boolean or
+            float values. True positive elements have either value >0.0 or True;
+            any other value is considered false positive.
+        k: number of top-scoring proposals to take.
+    Returns:
+        median_rank: median rank of all true positive proposals among top k by score.
+    """
+    ranks = []
+    for i in range(len(tp_fp_list)):
+        ranks.append(np.where(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])] > 0)[0])
+    concatenated_ranks = np.concatenate(ranks)
+    return np.median(concatenated_ranks)
+def compute_recall_at_k(tp_fp_list, num_gt, k):
+    """Computes Recall@k, MedianRank@k, where k is the top-scoring labels.
+    Args:
+        tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
+            detection on a single image, where the detections are sorted by score in
+            descending order. Further, each numpy array element can have boolean or
+            float values. True positive elements have either value >0.0 or True;
+            any other value is considered false positive.
+        num_gt: number of groundtruth anotations.
+        k: number of top-scoring proposals to take.
+    Returns:
+        recall: recall evaluated on the top k by score detections.
+    """
+    tp_fp_eval = []
+    for i in range(len(tp_fp_list)):
+        tp_fp_eval.append(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])])
+    tp_fp_eval = np.concatenate(tp_fp_eval)
+    return np.sum(tp_fp_eval) / num_gt

efficientdet/effdet/evaluation/np_box_list.py ADDED Viewed

	@@ -0,0 +1,696 @@

+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Bounding Box List operations for Numpy BoxLists.
+Example box operations that are supported:
+  * Areas: compute bounding box areas
+  * IOU: pairwise intersection-over-union scores
+"""
+import numpy as np
+class BoxList(object):
+    """Box collection.
+    BoxList represents a list of bounding boxes as numpy array, where each
+    bounding box is represented as a row of 4 numbers,
+    [y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes within a
+    given list correspond to a single image.
+    Optionally, users can add additional related fields (such as
+    objectness/classification scores).
+    """
+    def __init__(self, data):
+        """Constructs box collection.
+        Args:
+          data: a numpy array of shape [N, 4] representing box coordinates
+        Raises:
+          ValueError: if bbox data is not a numpy array
+          ValueError: if invalid dimensions for bbox data
+        """
+        if not isinstance(data, np.ndarray):
+            raise ValueError('data must be a numpy array.')
+        if len(data.shape) != 2 or data.shape[1] != 4:
+            raise ValueError('Invalid dimensions for box data.')
+        if data.dtype != np.float32 and data.dtype != np.float64:
+            raise ValueError('Invalid data type for box data: float is required.')
+        if not self._is_valid_boxes(data):
+            raise ValueError('Invalid box data. data must be a numpy array of '
+                             'N*[y_min, x_min, y_max, x_max]')
+        self.data = {'boxes': data}
+    def num_boxes(self):
+        """Return number of boxes held in collections."""
+        return self.data['boxes'].shape[0]
+    def get_extra_fields(self):
+        """Return all non-box fields."""
+        return [k for k in self.data.keys() if k != 'boxes']
+    def has_field(self, field):
+        return field in self.data
+    def add_field(self, field, field_data):
+        """Add data to a specified field.
+        Args:
+          field: a string parameter used to speficy a related field to be accessed.
+          field_data: a numpy array of [N, ...] representing the data associated
+              with the field.
+        Raises:
+          ValueError: if the field is already exist or the dimension of the field
+              data does not matches the number of boxes.
+        """
+        if self.has_field(field):
+            raise ValueError('Field ' + field + 'already exists')
+        if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
+            raise ValueError('Invalid dimensions for field data')
+        self.data[field] = field_data
+    def get(self):
+        """Convenience function for accesssing box coordinates.
+        Returns:
+          a numpy array of shape [N, 4] representing box corners
+        """
+        return self.get_field('boxes')
+    def get_field(self, field):
+        """Accesses data associated with the specified field in the box collection.
+        Args:
+          field: a string parameter used to speficy a related field to be accessed.
+        Returns:
+          a numpy 1-d array representing data of an associated field
+        Raises:
+          ValueError: if invalid field
+        """
+        if not self.has_field(field):
+            raise ValueError('field {} does not exist'.format(field))
+        return self.data[field]
+    def get_coordinates(self):
+        """Get corner coordinates of boxes.
+        Returns:
+         a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
+        """
+        box_coordinates = self.get()
+        y_min = box_coordinates[:, 0]
+        x_min = box_coordinates[:, 1]
+        y_max = box_coordinates[:, 2]
+        x_max = box_coordinates[:, 3]
+        return [y_min, x_min, y_max, x_max]
+    def _is_valid_boxes(self, data):
+        """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
+        Args:
+          data: a numpy array of shape [N, 4] representing box coordinates
+        Returns:
+          a boolean indicating whether all ymax of boxes are equal or greater than
+              ymin, and all xmax of boxes are equal or greater than xmin.
+        """
+        if data.shape[0] > 0:
+            for i in range(data.shape[0]):
+                if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
+                    return False
+        return True
+def area(boxes):
+    """Computes area of boxes.
+    Args:
+      boxes: Numpy array with shape [N, 4] holding N boxes
+    Returns:
+      a numpy array with shape [N*1] representing box areas
+    """
+    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+def intersection(boxes1, boxes2):
+    """Compute pairwise intersection areas between boxes.
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes
+      boxes2: a numpy array with shape [M, 4] holding M boxes
+    Returns:
+      a numpy array with shape [N*M] representing pairwise intersection area
+    """
+    [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
+    [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
+    all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
+    all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
+    intersect_heights = np.maximum(np.zeros(all_pairs_max_ymin.shape), all_pairs_min_ymax - all_pairs_max_ymin)
+    all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
+    all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
+    intersect_widths = np.maximum(np.zeros(all_pairs_max_xmin.shape), all_pairs_min_xmax - all_pairs_max_xmin)
+    return intersect_heights * intersect_widths
+def iou(boxes1, boxes2):
+    """Computes pairwise intersection-over-union between box collections.
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes.
+      boxes2: a numpy array with shape [M, 4] holding N boxes.
+    Returns:
+      a numpy array with shape [N, M] representing pairwise iou scores.
+    """
+    intersect = intersection(boxes1, boxes2)
+    area1 = area(boxes1)
+    area2 = area(boxes2)
+    union = np.expand_dims(area1, axis=1) + np.expand_dims(area2, axis=0) - intersect
+    return intersect / union
+def ioa(boxes1, boxes2):
+    """Computes pairwise intersection-over-area between box collections.
+    Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+    their intersection area over box2's area. Note that ioa is not symmetric,
+    that is, IOA(box1, box2) != IOA(box2, box1).
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes.
+      boxes2: a numpy array with shape [M, 4] holding N boxes.
+    Returns:
+      a numpy array with shape [N, M] representing pairwise ioa scores.
+    """
+    intersect = intersection(boxes1, boxes2)
+    areas = np.expand_dims(area(boxes2), axis=0)
+    return intersect / areas
+class SortOrder(object):
+    """Enum class for sort order.
+    Attributes:
+      ascend: ascend order.
+      descend: descend order.
+    """
+    ASCEND = 1
+    DESCEND = 2
+def area_boxlist(boxlist):
+    """Computes area of boxes.
+    Args:
+      boxlist: BoxList holding N boxes
+    Returns:
+      a numpy array with shape [N*1] representing box areas
+    """
+    y_min, x_min, y_max, x_max = boxlist.get_coordinates()
+    return (y_max - y_min) * (x_max - x_min)
+def intersection_boxlist(boxlist1, boxlist2):
+    """Compute pairwise intersection areas between boxes.
+    Args:
+      boxlist1: BoxList holding N boxes
+      boxlist2: BoxList holding M boxes
+    Returns:
+      a numpy array with shape [N*M] representing pairwise intersection area
+    """
+    return intersection(boxlist1.get(), boxlist2.get())
+def iou_boxlist(boxlist1, boxlist2):
+    """Computes pairwise intersection-over-union between box collections.
+    Args:
+      boxlist1: BoxList holding N boxes
+      boxlist2: BoxList holding M boxes
+    Returns:
+      a numpy array with shape [N, M] representing pairwise iou scores.
+    """
+    return iou(boxlist1.get(), boxlist2.get())
+def ioa_boxlist(boxlist1, boxlist2):
+    """Computes pairwise intersection-over-area between box collections.
+    Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+    their intersection area over box2's area. Note that ioa is not symmetric,
+    that is, IOA(box1, box2) != IOA(box2, box1).
+    Args:
+      boxlist1: BoxList holding N boxes
+      boxlist2: BoxList holding M boxes
+    Returns:
+      a numpy array with shape [N, M] representing pairwise ioa scores.
+    """
+    return ioa(boxlist1.get(), boxlist2.get())
+def gather_boxlist(boxlist, indices, fields=None):
+    """Gather boxes from BoxList according to indices and return new BoxList.
+    By default, gather returns boxes corresponding to the input index list, as
+    well as all additional fields stored in the boxlist (indexing into the
+    first dimension).  However one can optionally only gather from a
+    subset of fields.
+    Args:
+      boxlist: BoxList holding N boxes
+      indices: a 1-d numpy array of type int_
+      fields: (optional) list of fields to also gather from.  If None (default),
+          all fields are gathered from.  Pass an empty fields list to only gather the box coordinates.
+    Returns:
+      subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indices
+    Raises:
+      ValueError: if specified field is not contained in boxlist or if the indices are not of type int_
+    """
+    if indices.size:
+        if np.amax(indices) >= boxlist.num_boxes() or np.amin(indices) < 0:
+            raise ValueError('indices are out of valid range.')
+    subboxlist = BoxList(boxlist.get()[indices, :])
+    if fields is None:
+        fields = boxlist.get_extra_fields()
+    for field in fields:
+        extra_field_data = boxlist.get_field(field)
+        subboxlist.add_field(field, extra_field_data[indices, ...])
+    return subboxlist
+def sort_by_field_boxlist(boxlist, field, order=SortOrder.DESCEND):
+    """Sort boxes and associated fields according to a scalar field.
+    A common use case is reordering the boxes according to descending scores.
+    Args:
+        boxlist: BoxList holding N boxes.
+        field: A BoxList field for sorting and reordering the BoxList.
+        order: (Optional) 'descend' or 'ascend'. Default is descend.
+    Returns:
+      sorted_boxlist: A sorted BoxList with the field in the specified order.
+    Raises:
+        ValueError: if specified field does not exist or is not of single dimension.
+        ValueError: if the order is not either descend or ascend.
+    """
+    if not boxlist.has_field(field):
+        raise ValueError('Field ' + field + ' does not exist')
+    if len(boxlist.get_field(field).shape) != 1:
+        raise ValueError('Field ' + field + 'should be single dimension.')
+    if order != SortOrder.DESCEND and order != SortOrder.ASCEND:
+        raise ValueError('Invalid sort order')
+    field_to_sort = boxlist.get_field(field)
+    sorted_indices = np.argsort(field_to_sort)
+    if order == SortOrder.DESCEND:
+        sorted_indices = sorted_indices[::-1]
+    return gather_boxlist(boxlist, sorted_indices)
+def non_max_suppression(boxlist, max_output_size=10000, iou_threshold=1.0, score_threshold=-10.0):
+    """Non maximum suppression.
+    This op greedily selects a subset of detection bounding boxes, pruning
+    away boxes that have high IOU (intersection over union) overlap (> thresh)
+    with already selected boxes. In each iteration, the detected bounding box with
+    highest score in the available pool is selected.
+    Args:
+        boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+            representing detection scores. All scores belong to the same class.
+        max_output_size: maximum number of retained boxes
+        iou_threshold: intersection over union threshold.
+        score_threshold: minimum score threshold. Remove the boxes with scores less than
+            this value. Default value is set to -10. A very low threshold to pass pretty
+            much all the boxes, unless the user sets a different score threshold.
+    Returns:
+        a BoxList holding M boxes where M <= max_output_size
+    Raises:
+        ValueError: if 'scores' field does not exist
+        ValueError: if threshold is not in [0, 1]
+      ValueError: if max_output_size < 0
+    """
+    if not boxlist.has_field('scores'):
+        raise ValueError('Field scores does not exist')
+    if iou_threshold < 0. or iou_threshold > 1.0:
+        raise ValueError('IOU threshold must be in [0, 1]')
+    if max_output_size < 0:
+        raise ValueError('max_output_size must be bigger than 0.')
+    boxlist = filter_scores_greater_than(boxlist, score_threshold)
+    if boxlist.num_boxes() == 0:
+        return boxlist
+    boxlist = sort_by_field_boxlist(boxlist, 'scores')
+    # Prevent further computation if NMS is disabled.
+    if iou_threshold == 1.0:
+        if boxlist.num_boxes() > max_output_size:
+            selected_indices = np.arange(max_output_size)
+            return gather_boxlist(boxlist, selected_indices)
+        else:
+            return boxlist
+    boxes = boxlist.get()
+    num_boxes = boxlist.num_boxes()
+    # is_index_valid is True only for all remaining valid boxes,
+    is_index_valid = np.full(num_boxes, 1, dtype=bool)
+    selected_indices = []
+    num_output = 0
+    for i in range(num_boxes):
+        if num_output < max_output_size:
+            if is_index_valid[i]:
+                num_output += 1
+                selected_indices.append(i)
+                is_index_valid[i] = False
+                valid_indices = np.where(is_index_valid)[0]
+                if valid_indices.size == 0:
+                    break
+                intersect_over_union = iou(np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :])
+                intersect_over_union = np.squeeze(intersect_over_union, axis=0)
+                is_index_valid[valid_indices] = np.logical_and(
+                    is_index_valid[valid_indices],
+                    intersect_over_union <= iou_threshold)
+    return gather_boxlist(boxlist, np.array(selected_indices))
+def multi_class_non_max_suppression(boxlist, score_thresh, iou_thresh, max_output_size):
+    """Multi-class version of non maximum suppression.
+    This op greedily selects a subset of detection bounding boxes, pruning
+    away boxes that have high IOU (intersection over union) overlap (> thresh)
+    with already selected boxes.  It operates independently for each class for
+    which scores are provided (via the scores field of the input box_list),
+    pruning boxes with score less than a provided threshold prior to
+    applying NMS.
+    Args:
+        boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+            representing detection scores.  This scores field is a tensor that can
+            be 1 dimensional (in the case of a single class) or 2-dimensional, which
+            which case we assume that it takes the shape [num_boxes, num_classes].
+            We further assume that this rank is known statically and that
+            scores.shape[1] is also known (i.e., the number of classes is fixed
+            and known at graph construction time).
+        score_thresh: scalar threshold for score (low scoring boxes are removed).
+        iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
+            with previously selected boxes are removed).
+        max_output_size: maximum number of retained boxes per class.
+    Returns:
+        a BoxList holding M boxes with a rank-1 scores field representing
+            corresponding scores for each box with scores sorted in decreasing order
+            and a rank-1 classes field representing a class label for each box.
+    Raises:
+        ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
+            a valid scores field.
+    """
+    if not 0 <= iou_thresh <= 1.0:
+        raise ValueError('thresh must be between 0 and 1')
+    if not isinstance(boxlist, BoxList):
+        raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field('scores'):
+        raise ValueError('input boxlist must have \'scores\' field')
+    scores = boxlist.get_field('scores')
+    if len(scores.shape) == 1:
+        scores = np.reshape(scores, [-1, 1])
+    elif len(scores.shape) == 2:
+        if scores.shape[1] is None:
+            raise ValueError('scores field must have statically defined second dimension')
+    else:
+        raise ValueError('scores field must be of rank 1 or 2')
+    num_boxes = boxlist.num_boxes()
+    num_scores = scores.shape[0]
+    num_classes = scores.shape[1]
+    if num_boxes != num_scores:
+        raise ValueError('Incorrect scores field length: actual vs expected.')
+    selected_boxes_list = []
+    for class_idx in range(num_classes):
+        boxlist_and_class_scores = BoxList(boxlist.get())
+        class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
+        boxlist_and_class_scores.add_field('scores', class_scores)
+        boxlist_filt = filter_scores_greater_than(boxlist_and_class_scores, score_thresh)
+        nms_result = non_max_suppression(
+            boxlist_filt, max_output_size=max_output_size, iou_threshold=iou_thresh, score_threshold=score_thresh)
+        nms_result.add_field('classes', np.zeros_like(nms_result.get_field('scores')) + class_idx)
+        selected_boxes_list.append(nms_result)
+    selected_boxes = concatenate_boxlist(selected_boxes_list)
+    sorted_boxes = sort_by_field_boxlist(selected_boxes, 'scores')
+    return sorted_boxes
+def scale(boxlist, y_scale, x_scale):
+    """Scale box coordinates in x and y dimensions.
+    Args:
+        boxlist: BoxList holding N boxes
+        y_scale: float
+        x_scale: float
+    Returns:
+        boxlist: BoxList holding N boxes
+    """
+    y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+    y_min = y_scale * y_min
+    y_max = y_scale * y_max
+    x_min = x_scale * x_min
+    x_max = x_scale * x_max
+    scaled_boxlist = BoxList(np.hstack([y_min, x_min, y_max, x_max]))
+    fields = boxlist.get_extra_fields()
+    for field in fields:
+        extra_field_data = boxlist.get_field(field)
+        scaled_boxlist.add_field(field, extra_field_data)
+    return scaled_boxlist
+def clip_to_window(boxlist, window, filter_nonoverlapping=True):
+    """Clip bounding boxes to a window.
+    This op clips input bounding boxes (represented by bounding box
+    corners) to a window, optionally filtering out boxes that do not
+    overlap at all with the window.
+    Args:
+        boxlist: BoxList holding M_in boxes
+        window: a numpy array of shape [4] representing the [y_min, x_min, y_max, x_max]
+            window to which the op should clip boxes.
+        filter_nonoverlapping: whether to filter out boxes that do not overlap at all with the window.
+    Returns:
+        a BoxList holding M_out boxes where M_out <= M_in
+    """
+    y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+    win_y_min = window[0]
+    win_x_min = window[1]
+    win_y_max = window[2]
+    win_x_max = window[3]
+    y_min_clipped = np.fmax(np.fmin(y_min, win_y_max), win_y_min)
+    y_max_clipped = np.fmax(np.fmin(y_max, win_y_max), win_y_min)
+    x_min_clipped = np.fmax(np.fmin(x_min, win_x_max), win_x_min)
+    x_max_clipped = np.fmax(np.fmin(x_max, win_x_max), win_x_min)
+    clipped = BoxList(np.hstack([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped]))
+    clipped = _copy_extra_fields(clipped, boxlist)
+    if filter_nonoverlapping:
+        areas = area(clipped)
+        nonzero_area_indices = np.reshape(np.nonzero(np.greater(areas, 0.0)), [-1]).astype(np.int32)
+        clipped = gather_boxlist(clipped, nonzero_area_indices)
+    return clipped
+def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0):
+    """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
+    For each box in boxlist1, we want its IOA to be more than minoverlap with
+    at least one of the boxes in boxlist2. If it does not, we remove it.
+    Args:
+        boxlist1: BoxList holding N boxes.
+        boxlist2: BoxList holding M boxes.
+        minoverlap: Minimum required overlap between boxes, to count them as overlapping.
+    Returns:
+        A pruned boxlist with size [N', 4].
+    """
+    intersection_over_area = ioa(boxlist2, boxlist1)  # [M, N] tensor
+    intersection_over_area = np.amax(intersection_over_area, axis=0)  # [N] tensor
+    keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
+    keep_inds = np.nonzero(keep_bool)[0]
+    new_boxlist1 = gather_boxlist(boxlist1, keep_inds)
+    return new_boxlist1
+def prune_outside_window(boxlist, window):
+    """Prunes bounding boxes that fall outside a given window.
+    This function prunes bounding boxes that even partially fall outside the given
+    window. See also ClipToWindow which only prunes bounding boxes that fall
+    completely outside the window, and clips any bounding boxes that partially
+    overflow.
+    Args:
+        boxlist: a BoxList holding M_in boxes.
+        window: a numpy array of size 4, representing [ymin, xmin, ymax, xmax] of the window.
+    Returns:
+        pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in.
+        valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor.
+    """
+    y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+    win_y_min = window[0]
+    win_x_min = window[1]
+    win_y_max = window[2]
+    win_x_max = window[3]
+    coordinate_violations = np.hstack([
+        np.less(y_min, win_y_min), np.less(x_min, win_x_min),
+        np.greater(y_max, win_y_max), np.greater(x_max, win_x_max)])
+    valid_indices = np.reshape(np.where(np.logical_not(np.max(coordinate_violations, axis=1))), [-1])
+    return gather_boxlist(boxlist, valid_indices), valid_indices
+def concatenate_boxlist(boxlists, fields=None):
+    """Concatenate list of BoxLists.
+    This op concatenates a list of input BoxLists into a larger BoxList.  It also
+    handles concatenation of BoxList fields as long as the field tensor shapes
+    are equal except for the first dimension.
+    Args:
+      boxlists: list of BoxList objects
+      fields: optional list of fields to also concatenate.  By default, all
+        fields from the first BoxList in the list are included in the concatenation.
+    Returns:
+      a BoxList with number of boxes equal to
+        sum([boxlist.num_boxes() for boxlist in BoxList])
+    Raises:
+      ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
+        contains non BoxList objects), or if requested fields are not contained in all boxlists
+    """
+    if not isinstance(boxlists, list):
+        raise ValueError('boxlists should be a list')
+    if not boxlists:
+        raise ValueError('boxlists should have nonzero length')
+    for boxlist in boxlists:
+        if not isinstance(boxlist, BoxList):
+            raise ValueError('all elements of boxlists should be BoxList objects')
+    concatenated = BoxList(np.vstack([boxlist.get() for boxlist in boxlists]))
+    if fields is None:
+        fields = boxlists[0].get_extra_fields()
+    for field in fields:
+        first_field_shape = boxlists[0].get_field(field).shape
+        first_field_shape = first_field_shape[1:]
+        for boxlist in boxlists:
+            if not boxlist.has_field(field):
+                raise ValueError('boxlist must contain all requested fields')
+            field_shape = boxlist.get_field(field).shape
+            field_shape = field_shape[1:]
+            if field_shape != first_field_shape:
+                raise ValueError('field %s must have same shape for all boxlists '
+                                 'except for the 0th dimension.' % field)
+        concatenated_field = np.concatenate([boxlist.get_field(field) for boxlist in boxlists], axis=0)
+        concatenated.add_field(field, concatenated_field)
+    return concatenated
+def filter_scores_greater_than(boxlist, thresh):
+    """Filter to keep only boxes with score exceeding a given threshold.
+    This op keeps the collection of boxes whose corresponding scores are
+    greater than the input threshold.
+    Args:
+      boxlist: BoxList holding N boxes.  Must contain a 'scores' field representing detection scores.
+      thresh: scalar threshold
+    Returns:
+      a BoxList holding M boxes where M <= N
+    Raises:
+      ValueError: if boxlist not a BoxList object or if it does not have a scores field
+    """
+    if not isinstance(boxlist, BoxList):
+        raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field('scores'):
+        raise ValueError('input boxlist must have \'scores\' field')
+    scores = boxlist.get_field('scores')
+    if len(scores.shape) > 2:
+        raise ValueError('Scores should have rank 1 or 2')
+    if len(scores.shape) == 2 and scores.shape[1] != 1:
+        raise ValueError('Scores should have rank 1 or have shape '
+                         'consistent with [None, 1]')
+    high_score_indices = np.reshape(np.where(np.greater(scores, thresh)), [-1]).astype(np.int32)
+    return gather_boxlist(boxlist, high_score_indices)
+def change_coordinate_frame(boxlist, window):
+    """Change coordinate frame of the boxlist to be relative to window's frame.
+    Given a window of the form [ymin, xmin, ymax, xmax],
+    changes bounding box coordinates from boxlist to be relative to this window
+    (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
+    An example use case is data augmentation: where we are given groundtruth
+    boxes (boxlist) and would like to randomly crop the image to some
+    window (window). In this case we need to change the coordinate frame of
+    each groundtruth box to be relative to this new window.
+    Args:
+      boxlist: A BoxList object holding N boxes.
+      window: a size 4 1-D numpy array.
+    Returns:
+      Returns a BoxList object with N boxes.
+    """
+    win_height = window[2] - window[0]
+    win_width = window[3] - window[1]
+    boxlist_new = scale(
+        BoxList(boxlist.get() - [window[0], window[1], window[0], window[1]]), 1.0 / win_height, 1.0 / win_width)
+    _copy_extra_fields(boxlist_new, boxlist)
+    return boxlist_new
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+    """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+    Args:
+      boxlist_to_copy_to: BoxList to which extra fields are copied.
+      boxlist_to_copy_from: BoxList from which fields are copied.
+    Returns:
+      boxlist_to_copy_to with extra fields.
+    """
+    for field in boxlist_to_copy_from.get_extra_fields():
+        boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+    return boxlist_to_copy_to
+def _update_valid_indices_by_removing_high_iou_boxes(
+        selected_indices, is_index_valid, intersect_over_union, threshold):
+    max_iou = np.max(intersect_over_union[:, selected_indices], axis=1)
+    return np.logical_and(is_index_valid, max_iou <= threshold)

efficientdet/effdet/evaluation/np_mask_list.py ADDED Viewed

	@@ -0,0 +1,478 @@

+import numpy as np
+from .np_box_list import *
+EPSILON = 1e-7
+class MaskList(BoxList):
+    """Convenience wrapper for BoxList with masks.
+    BoxMaskList extends the np_box_list.BoxList to contain masks as well.
+    In particular, its constructor receives both boxes and masks. Note that the
+    masks correspond to the full image.
+    """
+    def __init__(self, box_data, mask_data):
+        """Constructs box collection.
+        Args:
+            box_data: a numpy array of shape [N, 4] representing box coordinates
+            mask_data: a numpy array of shape [N, height, width] representing masks
+                with values are in {0,1}. The masks correspond to the full
+                image. The height and the width will be equal to image height and width.
+        Raises:
+            ValueError: if bbox data is not a numpy array
+            ValueError: if invalid dimensions for bbox data
+            ValueError: if mask data is not a numpy array
+            ValueError: if invalid dimension for mask data
+        """
+        super(MaskList, self).__init__(box_data)
+        if not isinstance(mask_data, np.ndarray):
+            raise ValueError('Mask data must be a numpy array.')
+        if len(mask_data.shape) != 3:
+            raise ValueError('Invalid dimensions for mask data.')
+        if mask_data.dtype != np.uint8:
+            raise ValueError('Invalid data type for mask data: uint8 is required.')
+        if mask_data.shape[0] != box_data.shape[0]:
+            raise ValueError('There should be the same number of boxes and masks.')
+        self.data['masks'] = mask_data
+    def get_masks(self):
+        """Convenience function for accessing masks.
+        Returns:
+            a numpy array of shape [N, height, width] representing masks
+        """
+        return self.get_field('masks')
+def boxlist_to_masklist(boxlist):
+    """Converts a BoxList containing 'masks' into a BoxMaskList.
+    Args:
+        boxlist: An np_box_list.BoxList object.
+    Returns:
+        An BoxMaskList object.
+    Raises:
+        ValueError: If boxlist does not contain `masks` as a field.
+    """
+    if not boxlist.has_field('masks'):
+        raise ValueError('boxlist does not contain mask field.')
+    masklist = MaskList(box_data=boxlist.get(), mask_data=boxlist.get_field('masks'))
+    extra_fields = boxlist.get_extra_fields()
+    for key in extra_fields:
+        if key != 'masks':
+            masklist.data[key] = boxlist.get_field(key)
+    return masklist
+def area_mask(masks):
+    """Computes area of masks.
+    Args:
+        masks: Numpy array with shape [N, height, width] holding N masks. Masks
+        values are of type np.uint8 and values are in {0,1}.
+    Returns:
+        a numpy array with shape [N*1] representing mask areas.
+    Raises:
+        ValueError: If masks.dtype is not np.uint8
+    """
+    if masks.dtype != np.uint8:
+        raise ValueError('Masks type should be np.uint8')
+    return np.sum(masks, axis=(1, 2), dtype=np.float32)
+def intersection_mask(masks1, masks2):
+    """Compute pairwise intersection areas between masks.
+    Args:
+        masks1: a numpy array with shape [N, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+        masks2: a numpy array with shape [M, height, width] holding M masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+    Returns:
+        a numpy array with shape [N*M] representing pairwise intersection area.
+    Raises:
+        ValueError: If masks1 and masks2 are not of type np.uint8.
+    """
+    if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
+        raise ValueError('masks1 and masks2 should be of type np.uint8')
+    n = masks1.shape[0]
+    m = masks2.shape[0]
+    answer = np.zeros([n, m], dtype=np.float32)
+    for i in np.arange(n):
+        for j in np.arange(m):
+            answer[i, j] = np.sum(np.minimum(masks1[i], masks2[j]), dtype=np.float32)
+    return answer
+def iou_mask(masks1, masks2):
+    """Computes pairwise intersection-over-union between mask collections.
+    Args:
+        masks1: a numpy array with shape [N, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+        masks2: a numpy array with shape [M, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+    Returns:
+        a numpy array with shape [N, M] representing pairwise iou scores.
+    Raises:
+        ValueError: If masks1 and masks2 are not of type np.uint8.
+    """
+    if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
+        raise ValueError('masks1 and masks2 should be of type np.uint8')
+    intersect = intersection(masks1, masks2)
+    area1 = area(masks1)
+    area2 = area(masks2)
+    union = np.expand_dims(area1, axis=1) + np.expand_dims(area2, axis=0) - intersect
+    return intersect / np.maximum(union, EPSILON)
+def ioa_mask(masks1, masks2):
+    """Computes pairwise intersection-over-area between box collections.
+    Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as
+    their intersection area over mask2's area. Note that ioa is not symmetric,
+    that is, IOA(mask1, mask2) != IOA(mask2, mask1).
+    Args:
+        masks1: a numpy array with shape [N, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+        masks2: a numpy array with shape [M, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+    Returns:
+        a numpy array with shape [N, M] representing pairwise ioa scores.
+    Raises:
+        ValueError: If masks1 and masks2 are not of type np.uint8.
+    """
+    if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
+        raise ValueError('masks1 and masks2 should be of type np.uint8')
+    intersect = intersection(masks1, masks2)
+    areas = np.expand_dims(area(masks2), axis=0)
+    return intersect / (areas + EPSILON)
+def area_masklist(masklist):
+    """Computes area of masks.
+    Args:
+        masklist: BoxMaskList holding N boxes and masks
+    Returns:
+        a numpy array with shape [N*1] representing mask areas
+    """
+    return area_mask(masklist.get_masks())
+def intersection_masklist(masklist1, masklist2):
+    """Compute pairwise intersection areas between masks.
+    Args:
+        masklist1: BoxMaskList holding N boxes and masks
+        masklist2: BoxMaskList holding M boxes and masks
+    Returns:
+        a numpy array with shape [N*M] representing pairwise intersection area
+    """
+    return intersection_mask(masklist1.get_masks(), masklist2.get_masks())
+def iou_masklist(masklist1, masklist2):
+    """Computes pairwise intersection-over-union between box and mask collections.
+    Args:
+        masklist1: BoxMaskList holding N boxes and masks
+        masklist2: BoxMaskList holding M boxes and masks
+    Returns:
+        a numpy array with shape [N, M] representing pairwise iou scores.
+    """
+    return iou_mask(masklist1.get_masks(), masklist2.get_masks())
+def ioa_masklist(masklist1, masklist2):
+    """Computes pairwise intersection-over-area between box and mask collections.
+    Intersection-over-area (ioa) between two masks mask1 and mask2 is defined as
+    their intersection area over mask2's area. Note that ioa is not symmetric,
+    that is, IOA(mask1, mask2) != IOA(mask2, mask1).
+    Args:
+        masklist1: BoxMaskList holding N boxes and masks
+        masklist2: BoxMaskList holding M boxes and masks
+    Returns:
+        a numpy array with shape [N, M] representing pairwise ioa scores.
+    """
+    return ioa_mask(masklist1.get_masks(), masklist2.get_masks())
+def gather_masklist(masklist, indices, fields=None):
+    """Gather boxes from BoxMaskList according to indices.
+    By default, gather returns boxes corresponding to the input index list, as
+    well as all additional fields stored in the masklist (indexing into the
+    first dimension).  However one can optionally only gather from a
+    subset of fields.
+    Args:
+        masklist: BoxMaskList holding N boxes
+        indices: a 1-d numpy array of type int_
+        fields: (optional) list of fields to also gather from.  If None (default), all fields
+            are gathered from.  Pass an empty fields list to only gather the box coordinates.
+    Returns:
+        submasklist: a BoxMaskList corresponding to the subset of the input masklist specified by indices
+    Raises:
+        ValueError: if specified field is not contained in masklist or if the indices are not of type int_
+    """
+    if fields is not None:
+        if 'masks' not in fields:
+            fields.append('masks')
+    return boxlist_to_masklist(gather_boxlist(boxlist=masklist, indices=indices, fields=fields))
+def sort_by_field_masklist(masklist, field, order=SortOrder.DESCEND):
+    """Sort boxes and associated fields according to a scalar field.
+    A common use case is reordering the boxes according to descending scores.
+    Args:
+        masklist: BoxMaskList holding N boxes.
+        field: A BoxMaskList field for sorting and reordering the BoxMaskList.
+        order: (Optional) 'descend' or 'ascend'. Default is descend.
+    Returns:
+        sorted_masklist: A sorted BoxMaskList with the field in the specified order.
+    """
+    return boxlist_to_masklist(sort_by_field_boxlist(boxlist=masklist, field=field, order=order))
+def non_max_suppression_mask(masklist, max_output_size=10000, iou_threshold=1.0, score_threshold=-10.0):
+    """Non maximum suppression.
+    This op greedily selects a subset of detection bounding boxes, pruning
+    away boxes that have high IOU (intersection over union) overlap (> thresh)
+    with already selected boxes. In each iteration, the detected bounding box with
+    highest score in the available pool is selected.
+    Args:
+        masklist: BoxMaskList holding N boxes.  Must contain a 'scores' field representing
+            detection scores. All scores belong to the same class.
+        max_output_size: maximum number of retained boxes
+        iou_threshold: intersection over union threshold.
+        score_threshold: minimum score threshold. Remove the boxes with scores
+            less than this value. Default value is set to -10. A very
+            low threshold to pass pretty much all the boxes, unless
+            the user sets a different score threshold.
+    Returns:
+        an BoxMaskList holding M boxes where M <= max_output_size
+    Raises:
+        ValueError: if 'scores' field does not exist
+        ValueError: if threshold is not in [0, 1]
+        ValueError: if max_output_size < 0
+    """
+    if not masklist.has_field('scores'):
+        raise ValueError('Field scores does not exist')
+    if iou_threshold < 0. or iou_threshold > 1.0:
+        raise ValueError('IOU threshold must be in [0, 1]')
+    if max_output_size < 0:
+        raise ValueError('max_output_size must be bigger than 0.')
+    masklist = filter_scores_greater_than(masklist, score_threshold)
+    if masklist.num_boxes() == 0:
+        return masklist
+    masklist = sort_by_field_boxlist(masklist, 'scores')
+    # Prevent further computation if NMS is disabled.
+    if iou_threshold == 1.0:
+        if masklist.num_boxes() > max_output_size:
+            selected_indices = np.arange(max_output_size)
+            return gather_masklist(masklist, selected_indices)
+        else:
+            return masklist
+    masks = masklist.get_masks()
+    num_masks = masklist.num_boxes()
+    # is_index_valid is True only for all remaining valid boxes,
+    is_index_valid = np.full(num_masks, 1, dtype=bool)
+    selected_indices = []
+    num_output = 0
+    for i in range(num_masks):
+        if num_output < max_output_size:
+            if is_index_valid[i]:
+                num_output += 1
+                selected_indices.append(i)
+                is_index_valid[i] = False
+                valid_indices = np.where(is_index_valid)[0]
+                if valid_indices.size == 0:
+                    break
+                intersect_over_union = iou_mask(np.expand_dims(masks[i], axis=0), masks[valid_indices])
+                intersect_over_union = np.squeeze(intersect_over_union, axis=0)
+                is_index_valid[valid_indices] = np.logical_and(
+                    is_index_valid[valid_indices],
+                    intersect_over_union <= iou_threshold)
+    return gather_masklist(masklist, np.array(selected_indices))
+def multi_class_non_max_suppression_mask(masklist, score_thresh, iou_thresh, max_output_size):
+    """Multi-class version of non maximum suppression.
+    This op greedily selects a subset of detection bounding boxes, pruning away boxes that have
+    high IOU (intersection over union) overlap (> thresh) with already selected boxes.  It
+    operates independently for each class for which scores are provided (via the scores field
+    of the input box_list), pruning boxes with score less than a provided threshold prior to
+    applying NMS.
+    Args:
+        masklist: BoxMaskList holding N boxes.  Must contain a 'scores' field representing detection
+            scores.  This scores field is a tensor that can be 1 dimensional (in the case of a
+            single class) or 2-dimensional, in which case we assume that it takes the shape
+            [num_boxes, num_classes]. We further assume that this rank is known statically and
+            that scores.shape[1] is also known (i.e., the number of classes is fixed and known
+            at graph construction time).
+        score_thresh: scalar threshold for score (low scoring boxes are removed).
+        iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap with previously
+            selected boxes are removed).
+        max_output_size: maximum number of retained boxes per class.
+    Returns:
+        a masklist holding M boxes with a rank-1 scores field representing
+        corresponding scores for each box with scores sorted in decreasing order
+        and a rank-1 classes field representing a class label for each box.
+    Raises:
+        ValueError: if iou_thresh is not in [0, 1] or if input masklist does not have a valid scores field.
+    """
+    if not 0 <= iou_thresh <= 1.0:
+        raise ValueError('thresh must be between 0 and 1')
+    if not isinstance(masklist, MaskList):
+        raise ValueError('masklist must be a masklist')
+    if not masklist.has_field('scores'):
+        raise ValueError('input masklist must have \'scores\' field')
+    scores = masklist.get_field('scores')
+    if len(scores.shape) == 1:
+        scores = np.reshape(scores, [-1, 1])
+    elif len(scores.shape) == 2:
+        if scores.shape[1] is None:
+            raise ValueError('scores field must have statically defined second dimension')
+    else:
+        raise ValueError('scores field must be of rank 1 or 2')
+    num_boxes = masklist.num_boxes()
+    num_scores = scores.shape[0]
+    num_classes = scores.shape[1]
+    if num_boxes != num_scores:
+        raise ValueError('Incorrect scores field length: actual vs expected.')
+    selected_boxes_list = []
+    for class_idx in range(num_classes):
+        masklist_and_class_scores = MaskList(box_data=masklist.get(), mask_data=masklist.get_masks())
+        class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
+        masklist_and_class_scores.add_field('scores', class_scores)
+        masklist_filt = filter_scores_greater_than(masklist_and_class_scores, score_thresh)
+        nms_result = non_max_suppression(
+            masklist_filt,
+            max_output_size=max_output_size,
+            iou_threshold=iou_thresh,
+            score_threshold=score_thresh)
+        nms_result.add_field('classes', np.zeros_like(nms_result.get_field('scores')) + class_idx)
+        selected_boxes_list.append(nms_result)
+    selected_boxes = concatenate_boxlist(selected_boxes_list)
+    sorted_boxes = sort_by_field_boxlist(selected_boxes, 'scores')
+    return boxlist_to_masklist(boxlist=sorted_boxes)
+def prune_non_overlapping_masklist(masklist1, masklist2, minoverlap=0.0):
+    """Prunes the boxes in list1 that overlap less than thresh with list2.
+    For each mask in masklist1, we want its IOA to be more than minoverlap
+    with at least one of the masks in masklist2. If it does not, we remove
+    it. If the masks are not full size image, we do the pruning based on boxes.
+    Args:
+        masklist1: BoxMaskList holding N boxes and masks.
+        masklist2: BoxMaskList holding M boxes and masks.
+        minoverlap: Minimum required overlap between boxes, to count them as overlapping.
+    Returns:
+        A pruned masklist with size [N', 4].
+    """
+    intersection_over_area = ioa_masklist(masklist2, masklist1)  # [M, N] tensor
+    intersection_over_area = np.amax(intersection_over_area, axis=0)  # [N] tensor
+    keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
+    keep_inds = np.nonzero(keep_bool)[0]
+    new_masklist1 = gather_masklist(masklist1, keep_inds)
+    return new_masklist1
+def concatenate_masklist(masklists, fields=None):
+    """Concatenate list of masklists.
+    This op concatenates a list of input masklists into a larger
+    masklist.  It also
+    handles concatenation of masklist fields as long as the field tensor
+    shapes are equal except for the first dimension.
+    Args:
+        masklists: list of BoxMaskList objects
+        fields: optional list of fields to also concatenate.  By default, all
+            fields from the first BoxMaskList in the list are included in the concatenation.
+    Returns:
+        a masklist with number of boxes equal to sum([masklist.num_boxes() for masklist in masklist])
+    Raises:
+        ValueError: if masklists is invalid (i.e., is not a list, is empty, or contains non
+            masklist objects), or if requested fields are not contained in all masklists
+    """
+    if fields is not None:
+        if 'masks' not in fields:
+            fields.append('masks')
+    return boxlist_to_masklist(concatenate_boxlist(boxlists=masklists, fields=fields))
+def filter_scores_greater_than_masklist(masklist, thresh):
+    """Filter to keep only boxes and masks with score exceeding a given threshold.
+    This op keeps the collection of boxes and masks whose corresponding scores are
+    greater than the input threshold.
+    Args:
+        masklist: BoxMaskList holding N boxes and masks.  Must contain a
+            'scores' field representing detection scores.
+        thresh: scalar threshold
+    Returns:
+        a BoxMaskList holding M boxes and masks where M <= N
+    Raises:
+        ValueError: if masklist not a BoxMaskList object or if it does not have a scores field
+    """
+    if not isinstance(masklist, MaskList):
+        raise ValueError('masklist must be a BoxMaskList')
+    if not masklist.has_field('scores'):
+        raise ValueError('input masklist must have \'scores\' field')
+    scores = masklist.get_field('scores')
+    if len(scores.shape) > 2:
+        raise ValueError('Scores should have rank 1 or 2')
+    if len(scores.shape) == 2 and scores.shape[1] != 1:
+        raise ValueError('Scores should have rank 1 or have shape consistent with [None, 1]')
+    high_score_indices = np.reshape(np.where(np.greater(scores, thresh)), [-1]).astype(np.int32)
+    return gather_masklist(masklist, high_score_indices)

efficientdet/effdet/evaluation/object_detection_evaluation.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import logging
+import numpy as np
+from effdet.evaluation.metrics import compute_precision_recall, compute_average_precision, compute_cor_loc
+from effdet.evaluation.per_image_evaluation import PerImageEvaluation
+class ObjectDetectionEvaluation:
+    """Internal implementation of Pascal object detection metrics."""
+    def __init__(self,
+                 num_gt_classes,
+                 matching_iou_threshold=0.5,
+                 nms_iou_threshold=1.0,
+                 nms_max_output_boxes=10000,
+                 recall_lower_bound=0.0,
+                 recall_upper_bound=1.0,
+                 use_weighted_mean_ap=False,
+                 label_id_offset=0,
+                 group_of_weight=0.0,
+                 per_image_eval_class=PerImageEvaluation):
+        """Constructor.
+        Args:
+            num_gt_classes: Number of ground-truth classes.
+            matching_iou_threshold: IOU threshold used for matching detected boxes to ground-truth boxes.
+            nms_iou_threshold: IOU threshold used for non-maximum suppression.
+            nms_max_output_boxes: Maximum number of boxes returned by non-maximum suppression.
+            recall_lower_bound: lower bound of recall operating area
+            recall_upper_bound: upper bound of recall operating area
+            use_weighted_mean_ap: (optional) boolean which determines if the mean
+                average precision is computed directly from the scores and tp_fp_labels of all classes.
+            label_id_offset: The label id offset.
+            group_of_weight: Weight of group-of boxes.If set to 0, detections of the
+                correct class within a group-of box are ignored. If weight is > 0, then
+                if at least one detection falls within a group-of box with
+                matching_iou_threshold, weight group_of_weight is added to true
+                positives. Consequently, if no detection falls within a group-of box,
+                weight group_of_weight is added to false negatives.
+            per_image_eval_class: The class that contains functions for computing per image metrics.
+        Raises:
+            ValueError: if num_gt_classes is smaller than 1.
+        """
+        if num_gt_classes < 1:
+            raise ValueError('Need at least 1 groundtruth class for evaluation.')
+        self.per_image_eval = per_image_eval_class(
+            num_gt_classes=num_gt_classes,
+            matching_iou_threshold=matching_iou_threshold,
+            nms_iou_threshold=nms_iou_threshold,
+            nms_max_output_boxes=nms_max_output_boxes,
+            group_of_weight=group_of_weight)
+        self.recall_lower_bound = recall_lower_bound
+        self.recall_upper_bound = recall_upper_bound
+        self.group_of_weight = group_of_weight
+        self.num_class = num_gt_classes
+        self.use_weighted_mean_ap = use_weighted_mean_ap
+        self.label_id_offset = label_id_offset
+        self.gt_boxes = {}
+        self.gt_class_labels = {}
+        self.gt_masks = {}
+        self.gt_is_difficult_list = {}
+        self.gt_is_group_of_list = {}
+        self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=float)
+        self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)
+        self._initialize_detections()
+    def _initialize_detections(self):
+        """Initializes internal data structures."""
+        self.detection_keys = set()
+        self.scores_per_class = [[] for _ in range(self.num_class)]
+        self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
+        self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
+        self.average_precision_per_class = np.empty(self.num_class, dtype=float)
+        self.average_precision_per_class.fill(np.nan)
+        self.precisions_per_class = [np.nan] * self.num_class
+        self.recalls_per_class = [np.nan] * self.num_class
+        self.sum_tp_class = [np.nan] * self.num_class
+        self.corloc_per_class = np.ones(self.num_class, dtype=float)
+    def clear_detections(self):
+        self._initialize_detections()
+    def add_single_ground_truth_image_info(
+            self, image_key, gt_boxes, gt_class_labels,
+            gt_is_difficult_list=None, gt_is_group_of_list=None, gt_masks=None):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_key: A unique string/integer identifier for the image.
+            gt_boxes: float32 numpy array of shape [num_boxes, 4] containing
+                `num_boxes` groundtruth boxes of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+            gt_class_labels: integer numpy array of shape [num_boxes]
+                containing 0-indexed groundtruth classes for the boxes.
+            gt_is_difficult_list: A length M numpy boolean array denoting
+                whether a ground truth box is a difficult instance or not. To support
+                the case that no boxes are difficult, it is by default set as None.
+            gt_is_group_of_list: A length M numpy boolean array denoting
+                whether a ground truth box is a group-of box or not. To support the case
+                that no boxes are groups-of, it is by default set as None.
+            gt_masks: uint8 numpy array of shape [num_boxes, height, width]
+                containing `num_boxes` groundtruth masks. The mask values range from 0 to 1.
+        """
+        if image_key in self.gt_boxes:
+            logging.warning('image %s has already been added to the ground truth database.', image_key)
+            return
+        self.gt_boxes[image_key] = gt_boxes
+        self.gt_class_labels[image_key] = gt_class_labels
+        self.gt_masks[image_key] = gt_masks
+        if gt_is_difficult_list is None:
+            num_boxes = gt_boxes.shape[0]
+            gt_is_difficult_list = np.zeros(num_boxes, dtype=bool)
+        gt_is_difficult_list = gt_is_difficult_list.astype(dtype=bool)
+        self.gt_is_difficult_list[image_key] = gt_is_difficult_list
+        if gt_is_group_of_list is None:
+            num_boxes = gt_boxes.shape[0]
+            gt_is_group_of_list = np.zeros(num_boxes, dtype=bool)
+        if gt_masks is None:
+            num_boxes = gt_boxes.shape[0]
+            mask_presence_indicator = np.zeros(num_boxes, dtype=bool)
+        else:
+            mask_presence_indicator = (np.sum(gt_masks, axis=(1, 2)) == 0).astype(dtype=bool)
+        gt_is_group_of_list = gt_is_group_of_list.astype(dtype=bool)
+        self.gt_is_group_of_list[image_key] = gt_is_group_of_list
+        # ignore boxes without masks
+        masked_gt_is_difficult_list = gt_is_difficult_list | mask_presence_indicator
+        for class_index in range(self.num_class):
+            num_gt_instances = np.sum(
+                gt_class_labels[~masked_gt_is_difficult_list & ~gt_is_group_of_list] == class_index)
+            num_groupof_gt_instances = self.group_of_weight * np.sum(
+                gt_class_labels[gt_is_group_of_list & ~masked_gt_is_difficult_list] == class_index)
+            self.num_gt_instances_per_class[class_index] += num_gt_instances + num_groupof_gt_instances
+            if np.any(gt_class_labels == class_index):
+                self.num_gt_imgs_per_class[class_index] += 1
+    def add_single_detected_image_info(
+            self, image_key, detected_boxes, detected_scores, detected_class_labels, detected_masks=None):
+        """Adds detections for a single image to be used for evaluation.
+        Args:
+            image_key: A unique string/integer identifier for the image.
+            detected_boxes: float32 numpy array of shape [num_boxes, 4] containing
+                `num_boxes` detection boxes of the format [ymin, xmin, ymax, xmax] in
+                absolute image coordinates.
+            detected_scores: float32 numpy array of shape [num_boxes] containing
+                detection scores for the boxes.
+            detected_class_labels: integer numpy array of shape [num_boxes] containing
+                0-indexed detection classes for the boxes.
+            detected_masks: np.uint8 numpy array of shape [num_boxes, height, width]
+                containing `num_boxes` detection masks with values ranging between 0 and 1.
+        Raises:
+            ValueError: if the number of boxes, scores and class labels differ in length.
+        """
+        if len(detected_boxes) != len(detected_scores) or len(detected_boxes) != len(detected_class_labels):
+            raise ValueError(
+                'detected_boxes, detected_scores and '
+                'detected_class_labels should all have same lengths. Got'
+                '[%d, %d, %d]' % len(detected_boxes), len(detected_scores),
+                len(detected_class_labels))
+        if image_key in self.detection_keys:
+            logging.warning('image %s has already been added to the detection result database', image_key)
+            return
+        self.detection_keys.add(image_key)
+        if image_key in self.gt_boxes:
+            gt_boxes = self.gt_boxes[image_key]
+            gt_class_labels = self.gt_class_labels[image_key]
+            # Masks are popped instead of look up. The reason is that we do not want
+            # to keep all masks in memory which can cause memory overflow.
+            gt_masks = self.gt_masks.pop(image_key)
+            gt_is_difficult_list = self.gt_is_difficult_list[image_key]
+            gt_is_group_of_list = self.gt_is_group_of_list[image_key]
+        else:
+            gt_boxes = np.empty(shape=[0, 4], dtype=float)
+            gt_class_labels = np.array([], dtype=int)
+            if detected_masks is None:
+                gt_masks = None
+            else:
+                gt_masks = np.empty(shape=[0, 1, 1], dtype=float)
+            gt_is_difficult_list = np.array([], dtype=bool)
+            gt_is_group_of_list = np.array([], dtype=bool)
+        scores, tp_fp_labels, is_class_correctly_detected_in_image = \
+            self.per_image_eval.compute_object_detection_metrics(
+                detected_boxes=detected_boxes,
+                detected_scores=detected_scores,
+                detected_class_labels=detected_class_labels,
+                gt_boxes=gt_boxes,
+                gt_class_labels=gt_class_labels,
+                gt_is_difficult_list=gt_is_difficult_list,
+                gt_is_group_of_list=gt_is_group_of_list,
+                detected_masks=detected_masks,
+                gt_masks=gt_masks)
+        for i in range(self.num_class):
+            if scores[i].shape[0] > 0:
+                self.scores_per_class[i].append(scores[i])
+                self.tp_fp_labels_per_class[i].append(tp_fp_labels[i])
+        self.num_images_correctly_detected_per_class += is_class_correctly_detected_in_image
+    def evaluate(self):
+        """Compute evaluation result.
+        Returns:
+            A dict with the following fields -
+                average_precision: float numpy array of average precision for each class.
+                mean_ap: mean average precision of all classes, float scalar
+                precisions: List of precisions, each precision is a float numpy array
+                recalls: List of recalls, each recall is a float numpy array
+                corloc: numpy float array
+                mean_corloc: Mean CorLoc score for each class, float scalar
+        """
+        if (self.num_gt_instances_per_class == 0).any():
+            logging.warning(
+                'The following classes have no ground truth examples: %s',
+                np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset)
+        if self.use_weighted_mean_ap:
+            all_scores = np.array([], dtype=float)
+            all_tp_fp_labels = np.array([], dtype=bool)
+        for class_index in range(self.num_class):
+            if self.num_gt_instances_per_class[class_index] == 0:
+                continue
+            if not self.scores_per_class[class_index]:
+                scores = np.array([], dtype=float)
+                tp_fp_labels = np.array([], dtype=float)
+            else:
+                scores = np.concatenate(self.scores_per_class[class_index])
+                tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
+            if self.use_weighted_mean_ap:
+                all_scores = np.append(all_scores, scores)
+                all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels)
+            precision, recall = compute_precision_recall(
+                scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
+            recall_within_bound_indices = [
+                index for index, value in enumerate(recall) if
+                value >= self.recall_lower_bound and value <= self.recall_upper_bound
+            ]
+            recall_within_bound = recall[recall_within_bound_indices]
+            precision_within_bound = precision[recall_within_bound_indices]
+            self.precisions_per_class[class_index] = precision_within_bound
+            self.recalls_per_class[class_index] = recall_within_bound
+            self.sum_tp_class[class_index] = tp_fp_labels.sum()
+            average_precision = compute_average_precision(precision_within_bound, recall_within_bound)
+            self.average_precision_per_class[class_index] = average_precision
+            logging.debug('average_precision: %f', average_precision)
+        self.corloc_per_class = compute_cor_loc(
+            self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class)
+        if self.use_weighted_mean_ap:
+            num_gt_instances = np.sum(self.num_gt_instances_per_class)
+            precision, recall = compute_precision_recall(all_scores, all_tp_fp_labels, num_gt_instances)
+            recall_within_bound_indices = [
+                index for index, value in enumerate(recall) if
+                value >= self.recall_lower_bound and value <= self.recall_upper_bound
+            ]
+            recall_within_bound = recall[recall_within_bound_indices]
+            precision_within_bound = precision[recall_within_bound_indices]
+            mean_ap = compute_average_precision(precision_within_bound, recall_within_bound)
+        else:
+            mean_ap = np.nanmean(self.average_precision_per_class)
+        mean_corloc = np.nanmean(self.corloc_per_class)
+        return dict(
+            per_class_ap=self.average_precision_per_class, mean_ap=mean_ap,
+            per_class_precision=self.precisions_per_class,
+            per_class_recall=self.recalls_per_class,
+            per_class_corlocs=self.corloc_per_class, mean_corloc=mean_corloc)

efficientdet/effdet/evaluation/per_image_evaluation.py ADDED Viewed

	@@ -0,0 +1,538 @@

+from .np_mask_list import *
+from .metrics import *
+class PerImageEvaluation:
+    """Evaluate detection result of a single image."""
+    def __init__(self,
+                 num_gt_classes,
+                 matching_iou_threshold=0.5,
+                 nms_iou_threshold=0.3,
+                 nms_max_output_boxes=50,
+                 group_of_weight=0.0):
+        """Initialized PerImageEvaluation by evaluation parameters.
+        Args:
+            num_gt_classes: Number of ground truth object classes
+            matching_iou_threshold: A ratio of area intersection to union, which is
+                the threshold to consider whether a detection is true positive or not
+            nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
+            nms_max_output_boxes: Number of maximum output boxes in NMS.
+            group_of_weight: Weight of the group-of boxes.
+        """
+        self.matching_iou_threshold = matching_iou_threshold
+        self.nms_iou_threshold = nms_iou_threshold
+        self.nms_max_output_boxes = nms_max_output_boxes
+        self.num_gt_classes = num_gt_classes
+        self.group_of_weight = group_of_weight
+    def compute_object_detection_metrics(
+            self, detected_boxes, detected_scores, detected_class_labels,
+            gt_boxes, gt_class_labels, gt_is_difficult_list, gt_is_group_of_list,
+            detected_masks=None, gt_masks=None):
+        """Evaluates detections as being tp, fp or weighted from a single image.
+        The evaluation is done in two stages:
+            1. All detections are matched to non group-of boxes; true positives are
+               determined and detections matched to difficult boxes are ignored.
+            2. Detections that are determined as false positives are matched against
+               group-of boxes and weighted if matched.
+        Args:
+            detected_boxes: A float numpy array of shape [N, 4], representing N
+                regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max]
+            detected_scores: A float numpy array of shape [N, 1], representing the
+                confidence scores of the detected N object instances.
+            detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+                the class labels of the detected N object instances.
+            gt_boxes: A float numpy array of shape [M, 4], representing M
+                regions of object instances in ground truth
+            gt_class_labels: An integer numpy array of shape [M, 1],
+                representing M class labels of object instances in ground truth
+            gt_is_difficult_list: A boolean numpy array of length M denoting
+                whether a ground truth box is a difficult instance or not
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag
+            detected_masks: (optional) A uint8 numpy array of shape [N, height,
+                width]. If not None, the metrics will be computed based on masks.
+            gt_masks: (optional) A uint8 numpy array of shape [M, height,
+                width]. Can have empty masks, i.e. where all values are 0.
+        Returns:
+            scores: A list of C float numpy arrays. Each numpy array is of
+                shape [K, 1], representing K scores detected with object class label c
+            tp_fp_labels: A list of C boolean numpy arrays. Each numpy array
+                is of shape [K, 1], representing K True/False positive label of
+                object instances detected with class label c
+            is_class_correctly_detected_in_image: a numpy integer array of
+                shape [C, 1], indicating whether the correponding class has a least
+                one instance being correctly detected in the image
+        """
+        detected_boxes, detected_scores, detected_class_labels, detected_masks = (
+            self._remove_invalid_boxes(detected_boxes, detected_scores, detected_class_labels, detected_masks))
+        scores, tp_fp_labels = self._compute_tp_fp(
+            detected_boxes=detected_boxes,
+            detected_scores=detected_scores,
+            detected_class_labels=detected_class_labels,
+            gt_boxes=gt_boxes,
+            gt_class_labels=gt_class_labels,
+            gt_is_difficult_list=gt_is_difficult_list,
+            gt_is_group_of_list=gt_is_group_of_list,
+            detected_masks=detected_masks,
+            gt_masks=gt_masks)
+        is_class_correctly_detected_in_image = self._compute_cor_loc(
+            detected_boxes=detected_boxes,
+            detected_scores=detected_scores,
+            detected_class_labels=detected_class_labels,
+            gt_boxes=gt_boxes,
+            gt_class_labels=gt_class_labels,
+            detected_masks=detected_masks,
+            gt_masks=gt_masks)
+        return scores, tp_fp_labels, is_class_correctly_detected_in_image
+    def _compute_cor_loc(
+            self, detected_boxes, detected_scores, detected_class_labels,
+            gt_boxes, gt_class_labels, detected_masks=None, gt_masks=None):
+        """Compute CorLoc score for object detection result.
+        Args:
+            detected_boxes: A float numpy array of shape [N, 4], representing N
+                regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max]
+            detected_scores: A float numpy array of shape [N, 1], representing the
+                confidence scores of the detected N object instances.
+            detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+                the class labels of the detected N object instances.
+            gt_boxes: A float numpy array of shape [M, 4], representing M
+                regions of object instances in ground truth
+            gt_class_labels: An integer numpy array of shape [M, 1],
+                representing M class labels of object instances in ground truth
+            detected_masks: (optional) A uint8 numpy array of shape [N, height, width].
+                If not None, the scores will be computed based on masks.
+            gt_masks: (optional) A uint8 numpy array of shape [M, height, width].
+        Returns:
+            is_class_correctly_detected_in_image: a numpy integer array of
+                shape [C, 1], indicating whether the correponding class has a least
+                one instance being correctly detected in the image
+        Raises:
+            ValueError: If detected masks is not None but groundtruth masks are None,
+                or the other way around.
+        """
+        if (detected_masks is not None and gt_masks is None) or (
+                detected_masks is None and gt_masks is not None):
+            raise ValueError(
+                'If `detected_masks` is provided, then `gt_masks` should also be provided.')
+        is_class_correctly_detected_in_image = np.zeros(
+            self.num_gt_classes, dtype=int)
+        for i in range(self.num_gt_classes):
+            (gt_boxes_at_ith_class, gt_masks_at_ith_class,
+             detected_boxes_at_ith_class, detected_scores_at_ith_class,
+             detected_masks_at_ith_class) = self._get_ith_class_arrays(
+                detected_boxes, detected_scores, detected_masks,
+                detected_class_labels, gt_boxes, gt_masks,
+                gt_class_labels, i)
+            is_class_correctly_detected_in_image[i] = (
+                self._compute_is_class_correctly_detected_in_image(
+                    detected_boxes=detected_boxes_at_ith_class,
+                    detected_scores=detected_scores_at_ith_class,
+                    gt_boxes=gt_boxes_at_ith_class,
+                    detected_masks=detected_masks_at_ith_class,
+                    gt_masks=gt_masks_at_ith_class))
+        return is_class_correctly_detected_in_image
+    def _compute_is_class_correctly_detected_in_image(
+            self, detected_boxes, detected_scores, gt_boxes, detected_masks=None, gt_masks=None):
+        """Compute CorLoc score for a single class.
+        Args:
+            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
+            detected_scores: A 1-d numpy array of length N representing classification score
+            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
+            detected_masks: (optional) A np.uint8 numpy array of shape [N, height, width].
+                If not None, the scores will be computed based on masks.
+            gt_masks: (optional) A np.uint8 numpy array of shape [M, height, width].
+        Returns:
+            is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
+                class is correctly detected in the image or not
+        """
+        if detected_boxes.size > 0:
+            if gt_boxes.size > 0:
+                max_score_id = np.argmax(detected_scores)
+                mask_mode = False
+                if detected_masks is not None and gt_masks is not None:
+                    mask_mode = True
+                if mask_mode:
+                    detected_boxlist = MaskList(
+                        box_data=np.expand_dims(detected_boxes[max_score_id], axis=0),
+                        mask_data=np.expand_dims(detected_masks[max_score_id], axis=0))
+                    gt_boxlist = MaskList(box_data=gt_boxes, mask_data=gt_masks)
+                    iou = iou_masklist(detected_boxlist, gt_boxlist)
+                else:
+                    detected_boxlist = BoxList(np.expand_dims(detected_boxes[max_score_id, :], axis=0))
+                    gt_boxlist = BoxList(gt_boxes)
+                    iou = iou_boxlist(detected_boxlist, gt_boxlist)
+                if np.max(iou) >= self.matching_iou_threshold:
+                    return 1
+        return 0
+    def _compute_tp_fp(
+            self, detected_boxes, detected_scores, detected_class_labels,
+            gt_boxes, gt_class_labels, gt_is_difficult_list, gt_is_group_of_list, detected_masks=None, gt_masks=None):
+        """Labels true/false positives of detections of an image across all classes.
+        Args:
+            detected_boxes: A float numpy array of shape [N, 4], representing N
+                regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max]
+            detected_scores: A float numpy array of shape [N, 1], representing the
+                confidence scores of the detected N object instances.
+            detected_class_labels: A integer numpy array of shape [N, 1], representing
+                the class labels of the detected N object instances.
+            gt_boxes: A float numpy array of shape [M, 4], representing M
+                regions of object instances in ground truth
+            gt_class_labels: An integer numpy array of shape [M, 1],
+                representing M class labels of object instances in ground truth
+            gt_is_difficult_list: A boolean numpy array of length M denoting
+                whether a ground truth box is a difficult instance or not
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag
+            detected_masks: (optional) A np.uint8 numpy array of shape [N, height,
+                width]. If not None, the scores will be computed based on masks.
+            gt_masks: (optional) A np.uint8 numpy array of shape [M, height, width].
+        Returns:
+            result_scores: A list of float numpy arrays. Each numpy array is of
+                 shape [K, 1], representing K scores detected with object class label c
+            result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
+                shape [K, 1], representing K True/False positive label of object
+                instances detected with class label c
+        Raises:
+            ValueError: If detected masks is not None but groundtruth masks are None,
+                or the other way around.
+        """
+        if detected_masks is not None and gt_masks is None:
+            raise ValueError(
+                'Detected masks is available but groundtruth masks is not.')
+        if detected_masks is None and gt_masks is not None:
+            raise ValueError(
+                'Groundtruth masks is available but detected masks is not.')
+        result_scores = []
+        result_tp_fp_labels = []
+        for i in range(self.num_gt_classes):
+            gt_is_difficult_list_at_ith_class = (
+                gt_is_difficult_list[gt_class_labels == i])
+            gt_is_group_of_list_at_ith_class = (
+                gt_is_group_of_list[gt_class_labels == i])
+            (gt_boxes_at_ith_class, gt_masks_at_ith_class,
+             detected_boxes_at_ith_class, detected_scores_at_ith_class,
+             detected_masks_at_ith_class) = self._get_ith_class_arrays(
+                detected_boxes, detected_scores, detected_masks,
+                detected_class_labels, gt_boxes, gt_masks,
+                gt_class_labels, i)
+            scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
+                detected_boxes=detected_boxes_at_ith_class,
+                detected_scores=detected_scores_at_ith_class,
+                gt_boxes=gt_boxes_at_ith_class,
+                gt_is_difficult_list=gt_is_difficult_list_at_ith_class,
+                gt_is_group_of_list=gt_is_group_of_list_at_ith_class,
+                detected_masks=detected_masks_at_ith_class,
+                gt_masks=gt_masks_at_ith_class)
+            result_scores.append(scores)
+            result_tp_fp_labels.append(tp_fp_labels)
+        return result_scores, result_tp_fp_labels
+    def _get_overlaps_and_scores_mask_mode(
+            self, detected_boxes, detected_scores, detected_masks,
+            gt_boxes, gt_masks, gt_is_group_of_list):
+        """Computes overlaps and scores between detected and groudntruth masks.
+        Args:
+            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
+            detected_scores: A 1-d numpy array of length N representing classification score
+            detected_masks: A uint8 numpy array of shape [N, height, width]. If not
+                None, the scores will be computed based on masks.
+            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
+            gt_masks: A uint8 numpy array of shape [M, height, width].
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag. If a groundtruth box is
+                group-of box, every detection matching this box is ignored.
+        Returns:
+            iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+                gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
+            ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+                gt_group_of_boxlist.num_boxes() == 0 it will be None.
+            scores: The score of the detected boxlist.
+            num_boxes: Number of non-maximum suppressed detected boxes.
+        """
+        detected_boxlist = MaskList(box_data=detected_boxes, mask_data=detected_masks)
+        detected_boxlist.add_field('scores', detected_scores)
+        detected_boxlist = non_max_suppression(detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
+        gt_non_group_of_boxlist = MaskList(
+            box_data=gt_boxes[~gt_is_group_of_list], mask_data=gt_masks[~gt_is_group_of_list])
+        gt_group_of_boxlist = MaskList(
+            box_data=gt_boxes[gt_is_group_of_list], mask_data=gt_masks[gt_is_group_of_list])
+        iou_b = iou_masklist(detected_boxlist, gt_non_group_of_boxlist)
+        ioa_b = np.transpose(ioa_masklist(gt_group_of_boxlist, detected_boxlist))
+        scores = detected_boxlist.get_field('scores')
+        num_boxes = detected_boxlist.num_boxes()
+        return iou_b, ioa_b, scores, num_boxes
+    def _get_overlaps_and_scores_box_mode(
+            self, detected_boxes, detected_scores, gt_boxes, gt_is_group_of_list):
+        """Computes overlaps and scores between detected and groudntruth boxes.
+        Args:
+            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
+            detected_scores: A 1-d numpy array of length N representing classification score
+            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag. If a groundtruth box is
+                group-of box, every detection matching this box is ignored.
+        Returns:
+            iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+                gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
+            ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+                gt_group_of_boxlist.num_boxes() == 0 it will be None.
+            scores: The score of the detected boxlist.
+            num_boxes: Number of non-maximum suppressed detected boxes.
+        """
+        detected_boxlist = BoxList(detected_boxes)
+        detected_boxlist.add_field('scores', detected_scores)
+        detected_boxlist = non_max_suppression(detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
+        gt_non_group_of_boxlist = BoxList(gt_boxes[~gt_is_group_of_list])
+        gt_group_of_boxlist = BoxList(gt_boxes[gt_is_group_of_list])
+        iou_b = iou_boxlist(detected_boxlist, gt_non_group_of_boxlist)
+        ioa_b = np.transpose(ioa_boxlist(gt_group_of_boxlist, detected_boxlist))
+        scores = detected_boxlist.get_field('scores')
+        num_boxes = detected_boxlist.num_boxes()
+        return iou_b, ioa_b, scores, num_boxes
+    def _compute_tp_fp_for_single_class(
+            self, detected_boxes, detected_scores, gt_boxes,
+            gt_is_difficult_list, gt_is_group_of_list, detected_masks=None, gt_masks=None):
+        """Labels boxes detected with the same class from the same image as tp/fp.
+        Args:
+            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
+            detected_scores: A 1-d numpy array of length N representing classification score
+            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
+            gt_is_difficult_list: A boolean numpy array of length M denoting
+                whether a ground truth box is a difficult instance or not. If a
+                groundtruth box is difficult, every detection matching this box is ignored.
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag. If a groundtruth box is
+                group-of box, every detection matching this box is ignored.
+            detected_masks: (optional) A uint8 numpy array of shape [N, height,
+                width]. If not None, the scores will be computed based on masks.
+            gt_masks: (optional) A uint8 numpy array of shape [M, height, width].
+        Returns:
+            Two arrays of the same size, containing all boxes that were evaluated as
+            being true positives or false positives; if a box matched to a difficult
+            box or to a group-of box, it is ignored.
+            scores: A numpy array representing the detection scores.
+            tp_fp_labels: a boolean numpy array indicating whether a detection is a true positive.
+        """
+        if detected_boxes.size == 0:
+            return np.array([], dtype=float), np.array([], dtype=bool)
+        mask_mode = False
+        if detected_masks is not None and gt_masks is not None:
+            mask_mode = True
+        iou_b = np.ndarray([0, 0])
+        ioa_b = np.ndarray([0, 0])
+        iou_m = np.ndarray([0, 0])
+        ioa_m = np.ndarray([0, 0])
+        if mask_mode:
+            # For Instance Segmentation Evaluation on Open Images V5, not all boxed
+            # instances have corresponding segmentation annotations. Those boxes that
+            # dont have segmentation annotations are represented as empty masks in
+            # gt_masks nd array.
+            mask_presence_indicator = (np.sum(gt_masks, axis=(1, 2)) > 0)
+            iou_m, ioa_m, scores, num_detected_boxes = self._get_overlaps_and_scores_mask_mode(
+                detected_boxes=detected_boxes,
+                detected_scores=detected_scores,
+                detected_masks=detected_masks,
+                gt_boxes=gt_boxes[mask_presence_indicator, :],
+                gt_masks=gt_masks[mask_presence_indicator, :],
+                gt_is_group_of_list=gt_is_group_of_list[mask_presence_indicator])
+            if sum(mask_presence_indicator) < len(mask_presence_indicator):
+                # Not all masks are present - some masks are empty
+                iou_b, ioa_b, _, num_detected_boxes = self._get_overlaps_and_scores_box_mode(
+                    detected_boxes=detected_boxes,
+                    detected_scores=detected_scores,
+                    gt_boxes=gt_boxes[~mask_presence_indicator, :],
+                    gt_is_group_of_list=gt_is_group_of_list[~mask_presence_indicator])
+            num_detected_boxes = detected_boxes.shape[0]
+        else:
+            mask_presence_indicator = np.zeros(gt_is_group_of_list.shape, dtype=bool)
+            iou_b, ioa_b, scores, num_detected_boxes = self._get_overlaps_and_scores_box_mode(
+                detected_boxes=detected_boxes,
+                detected_scores=detected_scores,
+                gt_boxes=gt_boxes,
+                gt_is_group_of_list=gt_is_group_of_list)
+        if gt_boxes.size == 0:
+            return scores, np.zeros(num_detected_boxes, dtype=bool)
+        tp_fp_labels = np.zeros(num_detected_boxes, dtype=bool)
+        is_matched_to_box = np.zeros(num_detected_boxes, dtype=bool)
+        is_matched_to_difficult = np.zeros(num_detected_boxes, dtype=bool)
+        is_matched_to_group_of = np.zeros(num_detected_boxes, dtype=bool)
+        def compute_match_iou(iou_matrix, gt_nongroup_of_is_difficult_list, is_box):
+            """Computes TP/FP for non group-of box matching.
+            The function updates the following local variables:
+                tp_fp_labels - if a box is matched to group-of
+                is_matched_to_difficult - the detections that were processed at this are
+                    matched to difficult box.
+                is_matched_to_box - the detections that were processed at this stage are marked as is_box.
+            Args:
+                iou_matrix: intersection-over-union matrix [num_gt_boxes]x[num_det_boxes].
+                gt_nongroup_of_is_difficult_list: boolean that specifies if gt box is difficult.
+                is_box: boolean that specifies if currently boxes or masks are processed.
+            """
+            max_overlap_gt_ids = np.argmax(iou_matrix, axis=1)
+            is_gt_detected = np.zeros(iou_matrix.shape[1], dtype=bool)
+            for i in range(num_detected_boxes):
+                gt_id = max_overlap_gt_ids[i]
+                is_evaluatable = (
+                    not tp_fp_labels[i] and
+                    not is_matched_to_difficult[i] and
+                    iou_matrix[i, gt_id] >= self.matching_iou_threshold and
+                    not is_matched_to_group_of[i])
+                if is_evaluatable:
+                    if not gt_nongroup_of_is_difficult_list[gt_id]:
+                        if not is_gt_detected[gt_id]:
+                            tp_fp_labels[i] = True
+                            is_gt_detected[gt_id] = True
+                            is_matched_to_box[i] = is_box
+                    else:
+                        is_matched_to_difficult[i] = True
+        def compute_match_ioa(ioa_matrix, is_box):
+            """Computes TP/FP for group-of box matching.
+            The function updates the following local variables:
+                is_matched_to_group_of - if a box is matched to group-of
+                is_matched_to_box - the detections that were processed at this stage are marked as is_box.
+            Args:
+                ioa_matrix: intersection-over-area matrix [num_gt_boxes]x[num_det_boxes].
+                is_box: boolean that specifies if currently boxes or masks are processed.
+            Returns:
+                scores_group_of: of detections matched to group-of boxes[num_groupof_matched].
+                tp_fp_labels_group_of: boolean array of size [num_groupof_matched], all values are True.
+            """
+            scores_group_of = np.zeros(ioa_matrix.shape[1], dtype=float)
+            tp_fp_labels_group_of = self.group_of_weight * np.ones(ioa_matrix.shape[1], dtype=float)
+            max_overlap_group_of_gt_ids = np.argmax(ioa_matrix, axis=1)
+            for i in range(num_detected_boxes):
+                gt_id = max_overlap_group_of_gt_ids[i]
+                is_evaluatable = (
+                    not tp_fp_labels[i] and
+                    not is_matched_to_difficult[i] and
+                    ioa_matrix[i, gt_id] >= self.matching_iou_threshold and
+                    not is_matched_to_group_of[i])
+                if is_evaluatable:
+                    is_matched_to_group_of[i] = True
+                    is_matched_to_box[i] = is_box
+                    scores_group_of[gt_id] = max(scores_group_of[gt_id], scores[i])
+            selector = np.where((scores_group_of > 0) & (tp_fp_labels_group_of > 0))
+            scores_group_of = scores_group_of[selector]
+            tp_fp_labels_group_of = tp_fp_labels_group_of[selector]
+            return scores_group_of, tp_fp_labels_group_of
+        # The evaluation is done in two stages:
+        # 1. Evaluate all objects that actually have instance level masks.
+        # 2. Evaluate all objects that are not already evaluated as boxes.
+        if iou_m.shape[1] > 0:
+            gt_is_difficult_mask_list = gt_is_difficult_list[mask_presence_indicator]
+            gt_is_group_of_mask_list = gt_is_group_of_list[mask_presence_indicator]
+            compute_match_iou(iou_m, gt_is_difficult_mask_list[~gt_is_group_of_mask_list], is_box=False)
+        scores_mask_group_of = np.ndarray([0], dtype=float)
+        tp_fp_labels_mask_group_of = np.ndarray([0], dtype=float)
+        if ioa_m.shape[1] > 0:
+            scores_mask_group_of, tp_fp_labels_mask_group_of = compute_match_ioa(ioa_m, is_box=False)
+        # Tp-fp evaluation for non-group of boxes (if any).
+        if iou_b.shape[1] > 0:
+            gt_is_difficult_box_list = gt_is_difficult_list[~mask_presence_indicator]
+            gt_is_group_of_box_list = gt_is_group_of_list[~mask_presence_indicator]
+            compute_match_iou(iou_b, gt_is_difficult_box_list[~gt_is_group_of_box_list], is_box=True)
+        scores_box_group_of = np.ndarray([0], dtype=float)
+        tp_fp_labels_box_group_of = np.ndarray([0], dtype=float)
+        if ioa_b.shape[1] > 0:
+            scores_box_group_of, tp_fp_labels_box_group_of = compute_match_ioa(ioa_b, is_box=True)
+        if mask_mode:
+            # Note: here crowds are treated as ignore regions.
+            valid_entries = (~is_matched_to_difficult & ~is_matched_to_group_of & ~is_matched_to_box)
+            return np.concatenate((scores[valid_entries], scores_mask_group_of)),\
+                   np.concatenate((tp_fp_labels[valid_entries].astype(float), tp_fp_labels_mask_group_of))
+        else:
+            valid_entries = (~is_matched_to_difficult & ~is_matched_to_group_of)
+            return np.concatenate((scores[valid_entries], scores_box_group_of)),\
+                   np.concatenate((tp_fp_labels[valid_entries].astype(float), tp_fp_labels_box_group_of))
+    def _get_ith_class_arrays(
+            self, detected_boxes, detected_scores, detected_masks, detected_class_labels,
+            gt_boxes, gt_masks, gt_class_labels, class_index):
+        """Returns numpy arrays belonging to class with index `class_index`.
+        Args:
+            detected_boxes: A numpy array containing detected boxes.
+            detected_scores: A numpy array containing detected scores.
+            detected_masks: A numpy array containing detected masks.
+            detected_class_labels: A numpy array containing detected class labels.
+            gt_boxes: A numpy array containing groundtruth boxes.
+            gt_masks: A numpy array containing groundtruth masks.
+            gt_class_labels: A numpy array containing groundtruth class labels.
+            class_index: An integer index.
+        Returns:
+            gt_boxes_at_ith_class: A numpy array containing groundtruth boxes labeled as ith class.
+            gt_masks_at_ith_class: A numpy array containing groundtruth masks labeled as ith class.
+            detected_boxes_at_ith_class: A numpy array containing detected boxes corresponding to the ith class.
+            detected_scores_at_ith_class: A numpy array containing detected scores corresponding to the ith class.
+            detected_masks_at_ith_class: A numpy array containing detected masks corresponding to the ith class.
+        """
+        selected_groundtruth = (gt_class_labels == class_index)
+        gt_boxes_at_ith_class = gt_boxes[selected_groundtruth]
+        if gt_masks is not None:
+            gt_masks_at_ith_class = gt_masks[selected_groundtruth]
+        else:
+            gt_masks_at_ith_class = None
+        selected_detections = (detected_class_labels == class_index)
+        detected_boxes_at_ith_class = detected_boxes[selected_detections]
+        detected_scores_at_ith_class = detected_scores[selected_detections]
+        if detected_masks is not None:
+            detected_masks_at_ith_class = detected_masks[selected_detections]
+        else:
+            detected_masks_at_ith_class = None
+        return (gt_boxes_at_ith_class, gt_masks_at_ith_class,
+                detected_boxes_at_ith_class, detected_scores_at_ith_class,
+                detected_masks_at_ith_class)
+    def _remove_invalid_boxes(
+            self, detected_boxes, detected_scores, detected_class_labels, detected_masks=None):
+        """Removes entries with invalid boxes.
+        A box is invalid if either its xmax is smaller than its xmin, or its ymax is smaller than its ymin.
+        Args:
+            detected_boxes: A float numpy array of size [num_boxes, 4] containing box
+                coordinates in [ymin, xmin, ymax, xmax] format.
+            detected_scores: A float numpy array of size [num_boxes].
+            detected_class_labels: A int32 numpy array of size [num_boxes].
+            detected_masks: A uint8 numpy array of size [num_boxes, height, width].
+        Returns:
+            valid_detected_boxes: A float numpy array of size [num_valid_boxes, 4]
+                containing box coordinates in [ymin, xmin, ymax, xmax] format.
+            valid_detected_scores: A float numpy array of size [num_valid_boxes].
+            valid_detected_class_labels: A int32 numpy array of size [num_valid_boxes].
+            valid_detected_masks: A uint8 numpy array of size [num_valid_boxes, height, width].
+        """
+        valid_indices = np.logical_and(
+            detected_boxes[:, 0] < detected_boxes[:, 2], detected_boxes[:, 1] < detected_boxes[:, 3])
+        detected_boxes = detected_boxes[valid_indices]
+        detected_scores = detected_scores[valid_indices]
+        detected_class_labels = detected_class_labels[valid_indices]
+        if detected_masks is not None:
+            detected_masks = detected_masks[valid_indices]
+        return [detected_boxes, detected_scores, detected_class_labels, detected_masks]

efficientdet/effdet/evaluator.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import torch
+import torch.distributed as dist
+import abc
+import json
+import logging
+import time
+import numpy as np
+from .distributed import synchronize, is_main_process, all_gather_container
+from pycocotools.cocoeval import COCOeval
+# FIXME experimenting with speedups for OpenImages eval, it's slow
+#import pyximport; py_importer, pyx_importer = pyximport.install(pyimport=True)
+import effdet.evaluation.detection_evaluator as tfm_eval
+#pyximport.uninstall(py_importer, pyx_importer)
+_logger = logging.getLogger(__name__)
+__all__ = ['CocoEvaluator', 'PascalEvaluator', 'OpenImagesEvaluator', 'create_evaluator']
+class Evaluator:
+    def __init__(self, distributed=False, pred_yxyx=False):
+        self.distributed = distributed
+        self.distributed_device = None
+        self.pred_yxyx = pred_yxyx
+        self.img_indices = []
+        self.predictions = []
+    def add_predictions(self, detections, target):
+        if self.distributed:
+            if self.distributed_device is None:
+                # cache for use later to broadcast end metric
+                self.distributed_device = detections.device
+            synchronize()
+            detections = all_gather_container(detections)
+            img_indices = all_gather_container(target['img_idx'])
+            if not is_main_process():
+                return
+        else:
+            img_indices = target['img_idx']
+        detections = detections.cpu().numpy()
+        img_indices = img_indices.cpu().numpy()
+        for img_idx, img_dets in zip(img_indices, detections):
+            self.img_indices.append(img_idx)
+            self.predictions.append(img_dets)
+    def _coco_predictions(self):
+        # generate coco-style predictions
+        coco_predictions = []
+        coco_ids = []
+        for img_idx, img_dets in zip(self.img_indices, self.predictions):
+            img_id = self._dataset.img_ids[img_idx]
+            coco_ids.append(img_id)
+            if self.pred_yxyx:
+                # to xyxy
+                img_dets[:, 0:4] = img_dets[:, [1, 0, 3, 2]]
+            # to xywh
+            img_dets[:, 2] -= img_dets[:, 0]
+            img_dets[:, 3] -= img_dets[:, 1]
+            for det in img_dets:
+                score = float(det[4])
+                if score < .001:  # stop when below this threshold, scores in descending order
+                    break
+                coco_det = dict(
+                    image_id=int(img_id),
+                    bbox=det[0:4].tolist(),
+                    score=score,
+                    category_id=int(det[5]))
+                coco_predictions.append(coco_det)
+        return coco_predictions, coco_ids
+    @abc.abstractmethod
+    def evaluate(self):
+        pass
+    def save(self, result_file):
+        # save results in coco style, override to save in a alternate form
+        if not self.distributed or dist.get_rank() == 0:
+            assert len(self.predictions)
+            coco_predictions, coco_ids = self._coco_predictions()
+            json.dump(coco_predictions, open(result_file, 'w'), indent=4)
+class CocoEvaluator(Evaluator):
+    def __init__(self, dataset, neptune=None, distributed=False, pred_yxyx=False):
+        super().__init__(distributed=distributed, pred_yxyx=pred_yxyx)
+        self._dataset = dataset.parser
+        self.coco_api = dataset.parser.coco
+        self.neptune = neptune
+    def reset(self):
+        self.img_indices = []
+        self.predictions = []
+    def evaluate(self):
+        if not self.distributed or dist.get_rank() == 0:
+            assert len(self.predictions)
+            coco_predictions, coco_ids = self._coco_predictions()
+            json.dump(coco_predictions, open('./temp.json', 'w'), indent=4)
+            results = self.coco_api.loadRes('./temp.json')
+            coco_eval = COCOeval(self.coco_api, results, 'bbox')
+            coco_eval.params.imgIds = coco_ids  # score only ids we've used
+            coco_eval.evaluate()
+            coco_eval.accumulate()
+            coco_eval.summarize()
+            metric = coco_eval.stats[0]  # mAP 0.5-0.95
+            if self.neptune:
+                self.neptune.log_metric('valid/mAP/0.5-0.95IOU', metric)
+                self.neptune.log_metric('valid/mAP/0.5IOU', coco_eval.stats[1])
+            if self.distributed:
+                dist.broadcast(torch.tensor(metric, device=self.distributed_device), 0)
+        else:
+            metric = torch.tensor(0, device=self.distributed_device)
+            dist.broadcast(metric, 0)
+            metric = metric.item()
+        self.reset()
+        return metric
+class TfmEvaluator(Evaluator):
+    """ Tensorflow Models Evaluator Wrapper """
+    def __init__(
+            self, dataset, neptune=None, distributed=False, pred_yxyx=False,
+            evaluator_cls=tfm_eval.ObjectDetectionEvaluator):
+        super().__init__(distributed=distributed, pred_yxyx=pred_yxyx)
+        self._evaluator = evaluator_cls(categories=dataset.parser.cat_dicts)
+        self._eval_metric_name = self._evaluator._metric_names[0]
+        self._dataset = dataset.parser
+        self.neptune = neptune
+    def reset(self):
+        self._evaluator.clear()
+        self.img_indices = []
+        self.predictions = []
+    def evaluate(self):
+        if not self.distributed or dist.get_rank() == 0:
+            for img_idx, img_dets in zip(self.img_indices, self.predictions):
+                gt = self._dataset.get_ann_info(img_idx)
+                self._evaluator.add_single_ground_truth_image_info(img_idx, gt)
+                bbox = img_dets[:, 0:4] if self.pred_yxyx else img_dets[:, [1, 0, 3, 2]]
+                det = dict(bbox=bbox, score=img_dets[:, 4], cls=img_dets[:, 5])
+                self._evaluator.add_single_detected_image_info(img_idx, det)
+            metrics = self._evaluator.evaluate()
+            _logger.info('Metrics:')
+            for k, v in metrics.items():
+                _logger.info(f'{k}: {v}')
+                if self.neptune:
+                    key = 'valid/mAP/' + str(k).split('/')[-1]
+                    self.neptune.log_metric(key, v)
+            map_metric = metrics[self._eval_metric_name]
+            if self.distributed:
+                dist.broadcast(torch.tensor(map_metric, device=self.distributed_device), 0)
+        else:
+            map_metric = torch.tensor(0, device=self.distributed_device)
+            wait = dist.broadcast(map_metric, 0, async_op=True)
+            while not wait.is_completed():
+                # wait without spinning the cpu @ 100%, no need for low latency here
+                time.sleep(0.5)
+            map_metric = map_metric.item()
+        self.reset()
+        return map_metric
+class PascalEvaluator(TfmEvaluator):
+    def __init__(self, dataset, neptune=None, distributed=False, pred_yxyx=False):
+        super().__init__(
+            dataset, neptune, distributed=distributed, pred_yxyx=pred_yxyx, evaluator_cls=tfm_eval.PascalDetectionEvaluator)
+class OpenImagesEvaluator(TfmEvaluator):
+    def __init__(self, dataset, distributed=False, pred_yxyx=False):
+        super().__init__(
+            dataset, distributed=distributed, pred_yxyx=pred_yxyx, evaluator_cls=tfm_eval.OpenImagesDetectionEvaluator)
+def create_evaluator(name, dataset, neptune=None, distributed=False, pred_yxyx=False):
+    # FIXME support OpenImages Challenge2019 metric w/ image level label consideration
+    if 'coco' in name:
+        return CocoEvaluator(dataset, neptune, distributed=distributed, pred_yxyx=pred_yxyx)
+    elif 'openimages' in name:
+        return OpenImagesEvaluator(dataset, distributed=distributed, pred_yxyx=pred_yxyx)
+    else:
+        return CocoEvaluator(dataset, neptune, distributed=distributed, pred_yxyx=pred_yxyx)
+        #return PascalEvaluator(dataset, neptune, distributed=distributed, pred_yxyx=pred_yxyx)

efficientdet/effdet/factory.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from .efficientdet import EfficientDet, HeadNet
+from .bench import DetBenchTrain, DetBenchPredict
+from .config import get_efficientdet_config
+from .helpers import load_pretrained, load_checkpoint
+def create_model(
+        model_name, bench_task='', num_classes=None, pretrained=False,
+        checkpoint_path='', checkpoint_ema=False, **kwargs):
+    config = get_efficientdet_config(model_name)
+    return create_model_from_config(
+        config, bench_task=bench_task, num_classes=num_classes, pretrained=pretrained,
+        checkpoint_path=checkpoint_path, checkpoint_ema=checkpoint_ema, **kwargs)
+def create_model_from_config(
+        config, bench_task='', num_classes=None, pretrained=False,
+        checkpoint_path='', checkpoint_ema=False, **kwargs):
+    pretrained_backbone = kwargs.pop('pretrained_backbone', True)
+    if pretrained or checkpoint_path:
+        pretrained_backbone = False  # no point in loading backbone weights
+    # Config overrides, override some config values via kwargs.
+    overrides = ('redundant_bias', 'label_smoothing', 'new_focal', 'jit_loss')
+    for ov in overrides:
+        value = kwargs.pop(ov, None)
+        if value is not None:
+            setattr(config, ov, value)
+    labeler = kwargs.pop('bench_labeler', False)
+    # create the base model
+    model = EfficientDet(config, pretrained_backbone=pretrained_backbone, **kwargs)
+    # pretrained weights are always spec'd for original config, load them before we change the model
+    if pretrained:
+        load_pretrained(model, config.url)
+    # reset model head if num_classes doesn't match configs
+    if num_classes is not None and num_classes != config.num_classes:
+        model.reset_head(num_classes=num_classes)
+    # load an argument specified training checkpoint
+    if checkpoint_path:
+        load_checkpoint(model, checkpoint_path, use_ema=checkpoint_ema)
+    # wrap model in task specific training/prediction bench if set
+    if bench_task == 'train':
+        model = DetBenchTrain(model, create_labeler=labeler)
+    elif bench_task == 'predict':
+        model = DetBenchPredict(model)
+    return model

efficientdet/effdet/helpers.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+import os
+import logging
+from collections import OrderedDict
+from timm.models import load_checkpoint
+try:
+    from torch.hub import load_state_dict_from_url
+except ImportError:
+    from torch.utils.model_zoo import load_url as load_state_dict_from_url
+def load_pretrained(model, url, filter_fn=None, strict=True):
+    if not url:
+        logging.warning("Pretrained model URL is empty, using random initialization. "
+                        "Did you intend to use a `tf_` variant of the model?")
+        return
+    state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu')
+    if filter_fn is not None:
+        state_dict = filter_fn(state_dict)
+    model.load_state_dict(state_dict, strict=strict)

efficientdet/effdet/loss.py ADDED Viewed

	@@ -0,0 +1,259 @@

+""" EfficientDet Focal, Huber/Smooth L1 loss fns w/ jit support
+Based on loss fn in Google's automl EfficientDet repository (Apache 2.0 license).
+https://github.com/google/automl/tree/master/efficientdet
+Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Optional, List, Tuple
+def focal_loss_legacy(logits, targets, alpha: float, gamma: float, normalizer):
+    """Compute the focal loss between `logits` and the golden `target` values.
+    'Legacy focal loss matches the loss used in the official Tensorflow impl for initial
+    model releases and some time after that. It eventually transitioned to the 'New' loss
+    defined below.
+    Focal loss = -(1-pt)^gamma * log(pt)
+    where pt is the probability of being classified to the true class.
+    Args:
+        logits: A float32 tensor of size [batch, height_in, width_in, num_predictions].
+        targets: A float32 tensor of size [batch, height_in, width_in, num_predictions].
+        alpha: A float32 scalar multiplying alpha to the loss from positive examples
+            and (1-alpha) to the loss from negative examples.
+        gamma: A float32 scalar modulating loss from hard and easy examples.
+         normalizer: A float32 scalar normalizes the total loss from all examples.
+    Returns:
+        loss: A float32 scalar representing normalized total loss.
+    """
+    positive_label_mask = targets == 1.0
+    cross_entropy = F.binary_cross_entropy_with_logits(logits, targets.to(logits.dtype), reduction='none')
+    neg_logits = -1.0 * logits
+    modulator = torch.exp(gamma * targets * neg_logits - gamma * torch.log1p(torch.exp(neg_logits)))
+    loss = modulator * cross_entropy
+    weighted_loss = torch.where(positive_label_mask, alpha * loss, (1.0 - alpha) * loss)
+    return weighted_loss / normalizer
+def new_focal_loss(logits, targets, alpha: float, gamma: float, normalizer, label_smoothing: float = 0.01):
+    """Compute the focal loss between `logits` and the golden `target` values.
+    'New' is not the best descriptor, but this focal loss impl matches recent versions of
+    the official Tensorflow impl of EfficientDet. It has support for label smoothing, however
+    it is a bit slower, doesn't jit optimize well, and uses more memory.
+    Focal loss = -(1-pt)^gamma * log(pt)
+    where pt is the probability of being classified to the true class.
+    Args:
+        logits: A float32 tensor of size [batch, height_in, width_in, num_predictions].
+        targets: A float32 tensor of size [batch, height_in, width_in, num_predictions].
+        alpha: A float32 scalar multiplying alpha to the loss from positive examples
+            and (1-alpha) to the loss from negative examples.
+        gamma: A float32 scalar modulating loss from hard and easy examples.
+        normalizer: Divide loss by this value.
+        label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
+    Returns:
+        loss: A float32 scalar representing normalized total loss.
+    """
+    # compute focal loss multipliers before label smoothing, such that it will not blow up the loss.
+    pred_prob = logits.sigmoid()
+    targets = targets.to(logits.dtype)
+    onem_targets = 1. - targets
+    p_t = (targets * pred_prob) + (onem_targets * (1. - pred_prob))
+    alpha_factor = targets * alpha + onem_targets * (1. - alpha)
+    modulating_factor = (1. - p_t) ** gamma
+    # apply label smoothing for cross_entropy for each entry.
+    if label_smoothing > 0.:
+        targets = targets * (1. - label_smoothing) + .5 * label_smoothing
+    ce = F.binary_cross_entropy_with_logits(logits, targets, reduction='none')
+    # compute the final loss and return
+    return (1 / normalizer) * alpha_factor * modulating_factor * ce
+def huber_loss(
+        input, target, delta: float = 1., weights: Optional[torch.Tensor] = None, size_average: bool = True):
+    """
+    """
+    err = input - target
+    abs_err = err.abs()
+    quadratic = torch.clamp(abs_err, max=delta)
+    linear = abs_err - quadratic
+    loss = 0.5 * quadratic.pow(2) + delta * linear
+    if weights is not None:
+        loss *= weights
+    if size_average:
+        return loss.mean()
+    else:
+        return loss.sum()
+def smooth_l1_loss(
+        input, target, beta: float = 1. / 9, weights: Optional[torch.Tensor] = None, size_average: bool = True):
+    """
+    very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter
+    """
+    if beta < 1e-5:
+        # if beta == 0, then torch.where will result in nan gradients when
+        # the chain rule is applied due to pytorch implementation details
+        # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
+        # zeros, rather than "no gradient"). To avoid this issue, we define
+        # small values of beta to be exactly l1 loss.
+        loss = torch.abs(input - target)
+    else:
+        err = torch.abs(input - target)
+        loss = torch.where(err < beta, 0.5 * err.pow(2) / beta, err - 0.5 * beta)
+    if weights is not None:
+        loss *= weights
+    if size_average:
+        return loss.mean()
+    else:
+        return loss.sum()
+def _box_loss(box_outputs, box_targets, num_positives, delta: float = 0.1):
+    """Computes box regression loss."""
+    # delta is typically around the mean value of regression target.
+    # for instances, the regression targets of 512x512 input with 6 anchors on
+    # P3-P7 pyramid is about [0.1, 0.1, 0.2, 0.2].
+    normalizer = num_positives * 4.0
+    mask = box_targets != 0.0
+    box_loss = huber_loss(box_outputs, box_targets, weights=mask, delta=delta, size_average=False)
+    return box_loss / normalizer
+def one_hot(x, num_classes: int):
+    # NOTE: PyTorch one-hot does not handle -ve entries (no hot) like Tensorflow, so mask them out
+    x_non_neg = (x >= 0).unsqueeze(-1)
+    onehot = torch.zeros(x.shape + (num_classes,), device=x.device, dtype=torch.float32)
+    return onehot.scatter(-1, x.unsqueeze(-1) * x_non_neg, 1) * x_non_neg
+def loss_fn(
+        cls_outputs: List[torch.Tensor],
+        box_outputs: List[torch.Tensor],
+        cls_targets: List[torch.Tensor],
+        box_targets: List[torch.Tensor],
+        num_positives: torch.Tensor,
+        num_classes: int,
+        alpha: float,
+        gamma: float,
+        delta: float,
+        box_loss_weight: float,
+        label_smoothing: float = 0.,
+        new_focal: bool = False,
+        ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Computes total detection loss.
+    Computes total detection loss including box and class loss from all levels.
+    Args:
+        cls_outputs: a List with values representing logits in [batch_size, height, width, num_anchors].
+            at each feature level (index)
+        box_outputs: a List with values representing box regression targets in
+            [batch_size, height, width, num_anchors * 4] at each feature level (index)
+        cls_targets: groundtruth class targets.
+        box_targets: groundtrusth box targets.
+        num_positives: num positive grountruth anchors
+    Returns:
+        total_loss: an integer tensor representing total loss reducing from class and box losses from all levels.
+        cls_loss: an integer tensor representing total class loss.
+        box_loss: an integer tensor representing total box regression loss.
+    """
+    # Sum all positives in a batch for normalization and avoid zero
+    # num_positives_sum, which would lead to inf loss during training
+    num_positives_sum = (num_positives.sum() + 1.0).float()
+    levels = len(cls_outputs)
+    cls_losses = []
+    box_losses = []
+    for l in range(levels):
+        cls_targets_at_level = cls_targets[l]
+        box_targets_at_level = box_targets[l]
+        # Onehot encoding for classification labels.
+        cls_targets_at_level_oh = one_hot(cls_targets_at_level, num_classes)
+        bs, height, width, _, _ = cls_targets_at_level_oh.shape
+        cls_targets_at_level_oh = cls_targets_at_level_oh.view(bs, height, width, -1)
+        cls_outputs_at_level = cls_outputs[l].permute(0, 2, 3, 1).float()
+        if new_focal:
+            cls_loss = new_focal_loss(
+                cls_outputs_at_level, cls_targets_at_level_oh,
+                alpha=alpha, gamma=gamma, normalizer=num_positives_sum, label_smoothing=label_smoothing)
+        else:
+            cls_loss = focal_loss_legacy(
+                cls_outputs_at_level, cls_targets_at_level_oh,
+                alpha=alpha, gamma=gamma, normalizer=num_positives_sum)
+        cls_loss = cls_loss.view(bs, height, width, -1, num_classes)
+        cls_loss = cls_loss * (cls_targets_at_level != -2).unsqueeze(-1)
+        cls_losses.append(cls_loss.sum())   # FIXME reference code added a clamp here at some point ...clamp(0, 2))
+        box_losses.append(_box_loss(
+            box_outputs[l].permute(0, 2, 3, 1).float(),
+            box_targets_at_level,
+            num_positives_sum,
+            delta=delta))
+    # Sum per level losses to total loss.
+    cls_loss = torch.sum(torch.stack(cls_losses, dim=-1), dim=-1)
+    box_loss = torch.sum(torch.stack(box_losses, dim=-1), dim=-1)
+    total_loss = cls_loss + box_loss_weight * box_loss
+    return total_loss, cls_loss, box_loss
+loss_jit = torch.jit.script(loss_fn)
+class DetectionLoss(nn.Module):
+    __constants__ = ['num_classes']
+    def __init__(self, config):
+        super(DetectionLoss, self).__init__()
+        self.config = config
+        self.num_classes = config.num_classes
+        self.alpha = config.alpha
+        self.gamma = config.gamma
+        self.delta = config.delta
+        self.box_loss_weight = config.box_loss_weight
+        self.label_smoothing = config.label_smoothing
+        self.new_focal = config.new_focal
+        self.use_jit = config.jit_loss
+    def forward(
+            self,
+            cls_outputs: List[torch.Tensor],
+            box_outputs: List[torch.Tensor],
+            cls_targets: List[torch.Tensor],
+            box_targets: List[torch.Tensor],
+            num_positives: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        l_fn = loss_fn
+        if not torch.jit.is_scripting() and self.use_jit:
+            # This branch only active if parent / bench itself isn't being scripted
+            # NOTE: I haven't figured out what to do here wrt to tracing, is it an issue?
+            l_fn = loss_jit
+        return l_fn(
+            cls_outputs, box_outputs, cls_targets, box_targets, num_positives,
+            num_classes=self.num_classes, alpha=self.alpha, gamma=self.gamma, delta=self.delta,
+            box_loss_weight=self.box_loss_weight, label_smoothing=self.label_smoothing, new_focal=self.new_focal)

efficientdet/effdet/object_detection/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Tensorflow Object Detection
2	+
3	+ All of this code is adapted/ported/copied from https://github.com/google/automl/tree/552d0facd14f4fe9205a67fb13ecb5690a4d1c94/efficientdet/object_detection

efficientdet/effdet/object_detection/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Object detection data loaders and libraries are mostly based on RetinaNet:
+# https://github.com/tensorflow/tpu/tree/master/models/official/retinanet
+from .argmax_matcher import ArgMaxMatcher
+from .box_coder import FasterRcnnBoxCoder
+from .box_list import BoxList
+from .matcher import Match
+from .region_similarity_calculator import IouSimilarity
+from .target_assigner import TargetAssigner

efficientdet/effdet/object_detection/argmax_matcher.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Argmax matcher implementation.
+This class takes a similarity matrix and matches columns to rows based on the
+maximum value per column. One can specify matched_thresholds and
+to prevent columns from matching to rows (generally resulting in a negative
+training example) and unmatched_theshold to ignore the match (generally
+resulting in neither a positive or negative training example).
+This matcher is used in Fast(er)-RCNN.
+Note: matchers are used in TargetAssigners. There is a create_target_assigner
+factory function for popular implementations.
+"""
+import torch
+from .matcher import Match
+from typing import Optional
+def one_hot_bool(x, num_classes: int):
+    # for improved perf over PyTorch builtin one_hot, scatter to bool
+    onehot = torch.zeros(x.size(0), num_classes, device=x.device, dtype=torch.bool)
+    return onehot.scatter_(1, x.unsqueeze(1), 1)
+@torch.jit.script
+class ArgMaxMatcher(object):  # cannot inherit with torchscript
+    """Matcher based on highest value.
+    This class computes matches from a similarity matrix. Each column is matched
+    to a single row.
+    To support object detection target assignment this class enables setting both
+    matched_threshold (upper threshold) and unmatched_threshold (lower thresholds)
+    defining three categories of similarity which define whether examples are
+    positive, negative, or ignored:
+    (1) similarity >= matched_threshold: Highest similarity. Matched/Positive!
+    (2) matched_threshold > similarity >= unmatched_threshold: Medium similarity.
+            Depending on negatives_lower_than_unmatched, this is either
+            Unmatched/Negative OR Ignore.
+    (3) unmatched_threshold > similarity: Lowest similarity. Depending on flag
+            negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore.
+    For ignored matches this class sets the values in the Match object to -2.
+    """
+    def __init__(self,
+                 matched_threshold: float,
+                 unmatched_threshold: Optional[float] = None,
+                 negatives_lower_than_unmatched: bool = True,
+                 force_match_for_each_row: bool = False):
+        """Construct ArgMaxMatcher.
+        Args:
+            matched_threshold: Threshold for positive matches. Positive if
+                sim >= matched_threshold, where sim is the maximum value of the
+                similarity matrix for a given column. Set to None for no threshold.
+            unmatched_threshold: Threshold for negative matches. Negative if
+                sim < unmatched_threshold. Defaults to matched_threshold
+                when set to None.
+            negatives_lower_than_unmatched: Boolean which defaults to True. If True
+                then negative matches are the ones below the unmatched_threshold,
+                whereas ignored matches are in between the matched and unmatched
+                threshold. If False, then negative matches are in between the matched
+                and unmatched threshold, and everything lower than unmatched is ignored.
+            force_match_for_each_row: If True, ensures that each row is matched to
+                at least one column (which is not guaranteed otherwise if the
+                matched_threshold is high). Defaults to False. See
+                argmax_matcher_test.testMatcherForceMatch() for an example.
+        Raises:
+            ValueError: if unmatched_threshold is set but matched_threshold is not set
+                or if unmatched_threshold > matched_threshold.
+        """
+        if (matched_threshold is None) and (unmatched_threshold is not None):
+            raise ValueError('Need to also define matched_threshold when unmatched_threshold is defined')
+        self._matched_threshold = matched_threshold
+        self._unmatched_threshold: float = 0.
+        if unmatched_threshold is None:
+            self._unmatched_threshold = matched_threshold
+        else:
+            if unmatched_threshold > matched_threshold:
+                raise ValueError('unmatched_threshold needs to be smaller or equal to matched_threshold')
+            self._unmatched_threshold = unmatched_threshold
+        if not negatives_lower_than_unmatched:
+            if self._unmatched_threshold == self._matched_threshold:
+                raise ValueError('When negatives are in between matched and unmatched thresholds, these '
+                                 'cannot be of equal value. matched: %s, unmatched: %s',
+                                 self._matched_threshold, self._unmatched_threshold)
+        self._force_match_for_each_row = force_match_for_each_row
+        self._negatives_lower_than_unmatched = negatives_lower_than_unmatched
+    def _match_when_rows_are_empty(self, similarity_matrix):
+        """Performs matching when the rows of similarity matrix are empty.
+        When the rows are empty, all detections are false positives. So we return
+        a tensor of -1's to indicate that the columns do not match to any rows.
+        Returns:
+            matches:  int32 tensor indicating the row each column matches to.
+        """
+        return -1 * torch.ones(similarity_matrix.shape[1], dtype=torch.long, device=similarity_matrix.device)
+    def _match_when_rows_are_non_empty(self, similarity_matrix):
+        """Performs matching when the rows of similarity matrix are non empty.
+        Returns:
+            matches:  int32 tensor indicating the row each column matches to.
+        """
+        # Matches for each column
+        matched_vals, matches = torch.max(similarity_matrix, 0)
+        # Deal with matched and unmatched threshold
+        if self._matched_threshold is not None:
+            # Get logical indices of ignored and unmatched columns as tf.int64
+            below_unmatched_threshold = self._unmatched_threshold > matched_vals
+            between_thresholds = (matched_vals >= self._unmatched_threshold) & \
+                                 (self._matched_threshold > matched_vals)
+            if self._negatives_lower_than_unmatched:
+                matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -1)
+                matches = self._set_values_using_indicator(matches, between_thresholds, -2)
+            else:
+                matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -2)
+                matches = self._set_values_using_indicator(matches, between_thresholds, -1)
+        if self._force_match_for_each_row:
+            force_match_column_ids = torch.argmax(similarity_matrix, 1)
+            force_match_column_indicators = one_hot_bool(force_match_column_ids, similarity_matrix.shape[1])
+            force_match_column_mask, force_match_row_ids = torch.max(force_match_column_indicators, 0)
+            final_matches = torch.where(force_match_column_mask, force_match_row_ids, matches)
+            return final_matches
+        else:
+            return matches
+    def match(self, similarity_matrix):
+        """Tries to match each column of the similarity matrix to a row.
+        Args:
+            similarity_matrix: tensor of shape [N, M] representing any similarity metric.
+        Returns:
+            Match object with corresponding matches for each of M columns.
+        """
+        if similarity_matrix.shape[0] == 0:
+            return Match(self._match_when_rows_are_empty(similarity_matrix))
+        else:
+            return Match(self._match_when_rows_are_non_empty(similarity_matrix))
+    def _set_values_using_indicator(self, x, indicator, val: int):
+        """Set the indicated fields of x to val.
+        Args:
+            x: tensor.
+            indicator: boolean with same shape as x.
+            val: scalar with value to set.
+        Returns:
+            modified tensor.
+        """
+        indicator = indicator.to(dtype=x.dtype)
+        return x * (1 - indicator) + val * indicator

efficientdet/effdet/object_detection/box_coder.py ADDED Viewed

	@@ -0,0 +1,172 @@

+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base box coder.
+Box coders convert between coordinate frames, namely image-centric
+(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
+defined by a specific anchor).
+Users of a BoxCoder can call two methods:
+ encode: which encodes a box with respect to a given anchor
+  (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
+ decode: which inverts this encoding with a decode operation.
+In both cases, the arguments are assumed to be in 1-1 correspondence already;
+it is not the job of a BoxCoder to perform matching.
+"""
+import torch
+from typing import List, Optional
+from .box_list import BoxList
+# Box coder types.
+FASTER_RCNN = 'faster_rcnn'
+KEYPOINT = 'keypoint'
+MEAN_STDDEV = 'mean_stddev'
+SQUARE = 'square'
+"""Faster RCNN box coder.
+Faster RCNN box coder follows the coding schema described below:
+  ty = (y - ya) / ha
+  tx = (x - xa) / wa
+  th = log(h / ha)
+  tw = log(w / wa)
+  where x, y, w, h denote the box's center coordinates, width and height
+  respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+  center, width and height respectively.
+  See http://arxiv.org/abs/1506.01497 for details.
+"""
+EPS = 1e-8
+#@torch.jit.script
+class FasterRcnnBoxCoder(object):
+    """Faster RCNN box coder."""
+    def __init__(self, scale_factors: Optional[List[float]] = None, eps: float = EPS):
+        """Constructor for FasterRcnnBoxCoder.
+        Args:
+            scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+                If set to None, does not perform scaling. For Faster RCNN,
+                the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
+        """
+        self._scale_factors = scale_factors
+        if scale_factors is not None:
+            assert len(scale_factors) == 4
+            for scalar in scale_factors:
+                assert scalar > 0
+        self.eps = eps
+    #@property
+    def code_size(self):
+        return 4
+    def encode(self, boxes: BoxList, anchors: BoxList):
+        """Encode a box collection with respect to anchor collection.
+        Args:
+            boxes: BoxList holding N boxes to be encoded.
+            anchors: BoxList of anchors.
+        Returns:
+            a tensor representing N anchor-encoded boxes of the format [ty, tx, th, tw].
+        """
+        # Convert anchors to the center coordinate representation.
+        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+        ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+        # Avoid NaN in division and log below.
+        ha += self.eps
+        wa += self.eps
+        h += self.eps
+        w += self.eps
+        tx = (xcenter - xcenter_a) / wa
+        ty = (ycenter - ycenter_a) / ha
+        tw = torch.log(w / wa)
+        th = torch.log(h / ha)
+        # Scales location targets as used in paper for joint training.
+        if self._scale_factors is not None:
+            ty *= self._scale_factors[0]
+            tx *= self._scale_factors[1]
+            th *= self._scale_factors[2]
+            tw *= self._scale_factors[3]
+        return torch.stack([ty, tx, th, tw]).t()
+    def decode(self, rel_codes, anchors: BoxList):
+        """Decode relative codes to boxes.
+        Args:
+            rel_codes: a tensor representing N anchor-encoded boxes.
+            anchors: BoxList of anchors.
+        Returns:
+            boxes: BoxList holding N bounding boxes.
+        """
+        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+        ty, tx, th, tw = rel_codes.t().unbind()
+        if self._scale_factors is not None:
+            ty /= self._scale_factors[0]
+            tx /= self._scale_factors[1]
+            th /= self._scale_factors[2]
+            tw /= self._scale_factors[3]
+        w = torch.exp(tw) * wa
+        h = torch.exp(th) * ha
+        ycenter = ty * ha + ycenter_a
+        xcenter = tx * wa + xcenter_a
+        ymin = ycenter - h / 2.
+        xmin = xcenter - w / 2.
+        ymax = ycenter + h / 2.
+        xmax = xcenter + w / 2.
+        return BoxList(torch.stack([ymin, xmin, ymax, xmax]).t())
+def batch_decode(encoded_boxes, box_coder: FasterRcnnBoxCoder, anchors: BoxList):
+    """Decode a batch of encoded boxes.
+    This op takes a batch of encoded bounding boxes and transforms
+    them to a batch of bounding boxes specified by their corners in
+    the order of [y_min, x_min, y_max, x_max].
+    Args:
+        encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+            code_size] representing the location of the objects.
+        box_coder: a BoxCoder object.
+        anchors: a BoxList of anchors used to encode `encoded_boxes`.
+    Returns:
+        decoded_boxes: a float32 tensor of shape [batch_size, num_anchors, coder_size]
+            representing the corners of the objects in the order of [y_min, x_min, y_max, x_max].
+    Raises:
+        ValueError: if batch sizes of the inputs are inconsistent, or if
+        the number of anchors inferred from encoded_boxes and anchors are inconsistent.
+    """
+    assert len(encoded_boxes.shape) == 3
+    if encoded_boxes.shape[1] != anchors.num_boxes():
+        raise ValueError('The number of anchors inferred from encoded_boxes'
+                         ' and anchors are inconsistent: shape[1] of encoded_boxes'
+                         ' %s should be equal to the number of anchors: %s.' %
+                         (encoded_boxes.shape[1], anchors.num_boxes()))
+    decoded_boxes = torch.stack([
+        box_coder.decode(boxes, anchors).boxes for boxes in encoded_boxes.unbind()
+    ])
+    return decoded_boxes