ZJF-Thunder
/

Swin-Transformer-Object-Detection

Model card Files Files and versions Community

Swin-Transformer-Object-Detection / mmdet /models /roi_heads /dynamic_roi_head.py

ZJF-Thunder

添加文件

e26e560 almost 2 years ago

6.61 kB

	import numpy as np
	import torch

	from mmdet.core import bbox2roi
	from mmdet.models.losses import SmoothL1Loss
	from ..builder import HEADS
	from .standard_roi_head import StandardRoIHead

	EPS = 1e-15


	@HEADS.register_module()
	class DynamicRoIHead(StandardRoIHead):
	"""RoI head for `Dynamic R-CNN <https://arxiv.org/abs/2004.06002>`_."""

	def __init__(self, **kwargs):
	super(DynamicRoIHead, self).__init__(**kwargs)
	assert isinstance(self.bbox_head.loss_bbox, SmoothL1Loss)
	# the IoU history of the past `update_iter_interval` iterations
	self.iou_history = []
	# the beta history of the past `update_iter_interval` iterations
	self.beta_history = []

	def forward_train(self,
	x,
	img_metas,
	proposal_list,
	gt_bboxes,
	gt_labels,
	gt_bboxes_ignore=None,
	gt_masks=None):
	"""Forward function for training.

	Args:
	x (list[Tensor]): list of multi-level img features.

	img_metas (list[dict]): list of image info dict where each dict
	has: 'img_shape', 'scale_factor', 'flip', and may also contain
	'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
	For details on the values of these keys see
	`mmdet/datasets/pipelines/formatting.py:Collect`.

	proposals (list[Tensors]): list of region proposals.

	gt_bboxes (list[Tensor]): each item are the truth boxes for each
	image in [tl_x, tl_y, br_x, br_y] format.

	gt_labels (list[Tensor]): class indices corresponding to each box

	gt_bboxes_ignore (None \| list[Tensor]): specify which bounding
	boxes can be ignored when computing the loss.

	gt_masks (None \| Tensor) : true segmentation masks for each box
	used if the architecture supports a segmentation task.

	Returns:
	dict[str, Tensor]: a dictionary of loss components
	"""
	# assign gts and sample proposals
	if self.with_bbox or self.with_mask:
	num_imgs = len(img_metas)
	if gt_bboxes_ignore is None:
	gt_bboxes_ignore = [None for _ in range(num_imgs)]
	sampling_results = []
	cur_iou = []
	for i in range(num_imgs):
	assign_result = self.bbox_assigner.assign(
	proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i],
	gt_labels[i])
	sampling_result = self.bbox_sampler.sample(
	assign_result,
	proposal_list[i],
	gt_bboxes[i],
	gt_labels[i],
	feats=[lvl_feat[i][None] for lvl_feat in x])
	# record the `iou_topk`-th largest IoU in an image
	iou_topk = min(self.train_cfg.dynamic_rcnn.iou_topk,
	len(assign_result.max_overlaps))
	ious, _ = torch.topk(assign_result.max_overlaps, iou_topk)
	cur_iou.append(ious[-1].item())
	sampling_results.append(sampling_result)
	# average the current IoUs over images
	cur_iou = np.mean(cur_iou)
	self.iou_history.append(cur_iou)

	losses = dict()
	# bbox head forward and loss
	if self.with_bbox:
	bbox_results = self._bbox_forward_train(x, sampling_results,
	gt_bboxes, gt_labels,
	img_metas)
	losses.update(bbox_results['loss_bbox'])

	# mask head forward and loss
	if self.with_mask:
	mask_results = self._mask_forward_train(x, sampling_results,
	bbox_results['bbox_feats'],
	gt_masks, img_metas)
	losses.update(mask_results['loss_mask'])

	# update IoU threshold and SmoothL1 beta
	update_iter_interval = self.train_cfg.dynamic_rcnn.update_iter_interval
	if len(self.iou_history) % update_iter_interval == 0:
	new_iou_thr, new_beta = self.update_hyperparameters()

	return losses

	def _bbox_forward_train(self, x, sampling_results, gt_bboxes, gt_labels,
	img_metas):
	num_imgs = len(img_metas)
	rois = bbox2roi([res.bboxes for res in sampling_results])
	bbox_results = self._bbox_forward(x, rois)

	bbox_targets = self.bbox_head.get_targets(sampling_results, gt_bboxes,
	gt_labels, self.train_cfg)
	# record the `beta_topk`-th smallest target
	# `bbox_targets[2]` and `bbox_targets[3]` stand for bbox_targets
	# and bbox_weights, respectively
	pos_inds = bbox_targets[3][:, 0].nonzero().squeeze(1)
	num_pos = len(pos_inds)
	cur_target = bbox_targets[2][pos_inds, :2].abs().mean(dim=1)
	beta_topk = min(self.train_cfg.dynamic_rcnn.beta_topk * num_imgs,
	num_pos)
	cur_target = torch.kthvalue(cur_target, beta_topk)[0].item()
	self.beta_history.append(cur_target)
	loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],
	bbox_results['bbox_pred'], rois,
	*bbox_targets)

	bbox_results.update(loss_bbox=loss_bbox)
	return bbox_results

	def update_hyperparameters(self):
	"""Update hyperparameters like IoU thresholds for assigner and beta for
	SmoothL1 loss based on the training statistics.

	Returns:
	tuple[float]: the updated ``iou_thr`` and ``beta``.
	"""
	new_iou_thr = max(self.train_cfg.dynamic_rcnn.initial_iou,
	np.mean(self.iou_history))
	self.iou_history = []
	self.bbox_assigner.pos_iou_thr = new_iou_thr
	self.bbox_assigner.neg_iou_thr = new_iou_thr
	self.bbox_assigner.min_pos_iou = new_iou_thr
	if (np.median(self.beta_history) < EPS):
	# avoid 0 or too small value for new_beta
	new_beta = self.bbox_head.loss_bbox.beta
	else:
	new_beta = min(self.train_cfg.dynamic_rcnn.initial_beta,
	np.median(self.beta_history))
	self.beta_history = []
	self.bbox_head.loss_bbox.beta = new_beta
	return new_iou_thr, new_beta