File size: 7,697 Bytes
eb9a9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python3
# Copyright 2004-present Facebook. All Rights Reserved.
import copy
import numpy as np
from typing import Dict
import torch
from scipy.optimize import linear_sum_assignment

from annotator.oneformer.detectron2.config import configurable
from annotator.oneformer.detectron2.structures import Boxes, Instances

from ..config.config import CfgNode as CfgNode_
from .base_tracker import BaseTracker


class BaseHungarianTracker(BaseTracker):
    """

    A base class for all Hungarian trackers

    """

    @configurable
    def __init__(

        self,

        video_height: int,

        video_width: int,

        max_num_instances: int = 200,

        max_lost_frame_count: int = 0,

        min_box_rel_dim: float = 0.02,

        min_instance_period: int = 1,

        **kwargs

    ):
        """

        Args:

        video_height: height the video frame

        video_width: width of the video frame

        max_num_instances: maximum number of id allowed to be tracked

        max_lost_frame_count: maximum number of frame an id can lost tracking

                              exceed this number, an id is considered as lost

                              forever

        min_box_rel_dim: a percentage, smaller than this dimension, a bbox is

                         removed from tracking

        min_instance_period: an instance will be shown after this number of period

                             since its first showing up in the video

        """
        super().__init__(**kwargs)
        self._video_height = video_height
        self._video_width = video_width
        self._max_num_instances = max_num_instances
        self._max_lost_frame_count = max_lost_frame_count
        self._min_box_rel_dim = min_box_rel_dim
        self._min_instance_period = min_instance_period

    @classmethod
    def from_config(cls, cfg: CfgNode_) -> Dict:
        raise NotImplementedError("Calling HungarianTracker::from_config")

    def build_cost_matrix(self, instances: Instances, prev_instances: Instances) -> np.ndarray:
        raise NotImplementedError("Calling HungarianTracker::build_matrix")

    def update(self, instances: Instances) -> Instances:
        if instances.has("pred_keypoints"):
            raise NotImplementedError("Need to add support for keypoints")
        instances = self._initialize_extra_fields(instances)
        if self._prev_instances is not None:
            self._untracked_prev_idx = set(range(len(self._prev_instances)))
            cost_matrix = self.build_cost_matrix(instances, self._prev_instances)
            matched_idx, matched_prev_idx = linear_sum_assignment(cost_matrix)
            instances = self._process_matched_idx(instances, matched_idx, matched_prev_idx)
            instances = self._process_unmatched_idx(instances, matched_idx)
            instances = self._process_unmatched_prev_idx(instances, matched_prev_idx)
        self._prev_instances = copy.deepcopy(instances)
        return instances

    def _initialize_extra_fields(self, instances: Instances) -> Instances:
        """

        If input instances don't have ID, ID_period, lost_frame_count fields,

        this method is used to initialize these fields.



        Args:

            instances: D2 Instances, for predictions of the current frame

        Return:

            D2 Instances with extra fields added

        """
        if not instances.has("ID"):
            instances.set("ID", [None] * len(instances))
        if not instances.has("ID_period"):
            instances.set("ID_period", [None] * len(instances))
        if not instances.has("lost_frame_count"):
            instances.set("lost_frame_count", [None] * len(instances))
        if self._prev_instances is None:
            instances.ID = list(range(len(instances)))
            self._id_count += len(instances)
            instances.ID_period = [1] * len(instances)
            instances.lost_frame_count = [0] * len(instances)
        return instances

    def _process_matched_idx(

        self, instances: Instances, matched_idx: np.ndarray, matched_prev_idx: np.ndarray

    ) -> Instances:
        assert matched_idx.size == matched_prev_idx.size
        for i in range(matched_idx.size):
            instances.ID[matched_idx[i]] = self._prev_instances.ID[matched_prev_idx[i]]
            instances.ID_period[matched_idx[i]] = (
                self._prev_instances.ID_period[matched_prev_idx[i]] + 1
            )
            instances.lost_frame_count[matched_idx[i]] = 0
        return instances

    def _process_unmatched_idx(self, instances: Instances, matched_idx: np.ndarray) -> Instances:
        untracked_idx = set(range(len(instances))).difference(set(matched_idx))
        for idx in untracked_idx:
            instances.ID[idx] = self._id_count
            self._id_count += 1
            instances.ID_period[idx] = 1
            instances.lost_frame_count[idx] = 0
        return instances

    def _process_unmatched_prev_idx(

        self, instances: Instances, matched_prev_idx: np.ndarray

    ) -> Instances:
        untracked_instances = Instances(
            image_size=instances.image_size,
            pred_boxes=[],
            pred_masks=[],
            pred_classes=[],
            scores=[],
            ID=[],
            ID_period=[],
            lost_frame_count=[],
        )
        prev_bboxes = list(self._prev_instances.pred_boxes)
        prev_classes = list(self._prev_instances.pred_classes)
        prev_scores = list(self._prev_instances.scores)
        prev_ID_period = self._prev_instances.ID_period
        if instances.has("pred_masks"):
            prev_masks = list(self._prev_instances.pred_masks)
        untracked_prev_idx = set(range(len(self._prev_instances))).difference(set(matched_prev_idx))
        for idx in untracked_prev_idx:
            x_left, y_top, x_right, y_bot = prev_bboxes[idx]
            if (
                (1.0 * (x_right - x_left) / self._video_width < self._min_box_rel_dim)
                or (1.0 * (y_bot - y_top) / self._video_height < self._min_box_rel_dim)
                or self._prev_instances.lost_frame_count[idx] >= self._max_lost_frame_count
                or prev_ID_period[idx] <= self._min_instance_period
            ):
                continue
            untracked_instances.pred_boxes.append(list(prev_bboxes[idx].numpy()))
            untracked_instances.pred_classes.append(int(prev_classes[idx]))
            untracked_instances.scores.append(float(prev_scores[idx]))
            untracked_instances.ID.append(self._prev_instances.ID[idx])
            untracked_instances.ID_period.append(self._prev_instances.ID_period[idx])
            untracked_instances.lost_frame_count.append(
                self._prev_instances.lost_frame_count[idx] + 1
            )
            if instances.has("pred_masks"):
                untracked_instances.pred_masks.append(prev_masks[idx].numpy().astype(np.uint8))

        untracked_instances.pred_boxes = Boxes(torch.FloatTensor(untracked_instances.pred_boxes))
        untracked_instances.pred_classes = torch.IntTensor(untracked_instances.pred_classes)
        untracked_instances.scores = torch.FloatTensor(untracked_instances.scores)
        if instances.has("pred_masks"):
            untracked_instances.pred_masks = torch.IntTensor(untracked_instances.pred_masks)
        else:
            untracked_instances.remove("pred_masks")

        return Instances.cat(
            [
                instances,
                untracked_instances,
            ]
        )