Source code for mmocr.models.textdet.dense_heads.head_mixin

import numpy as np

from mmdet.models.builder import HEADS
from mmocr.models.textdet.postprocess import decode
from mmocr.utils import check_argument


[docs]@HEADS.register_module()
class HeadMixin:
    """The head minxin for dbnet and pannet heads."""

[docs]    def resize_boundary(self, boundaries, scale_factor):
        """Rescale boundaries via scale_factor.

        Args:
            boundaries (list[list[float]]): The boundary list. Each boundary
            with size 2k+1 with k>=4.
            scale_factor(ndarray): The scale factor of size (4,).

        Returns:
            boundaries (list[list[float]]): The scaled boundaries.
        """
        assert check_argument.is_2dlist(boundaries)
        assert isinstance(scale_factor, np.ndarray)
        assert scale_factor.shape[0] == 4

        for b in boundaries:
            sz = len(b)
            check_argument.valid_boundary(b, True)
            b[:sz -
              1] = (np.array(b[:sz - 1]) *
                    (np.tile(scale_factor[:2], int(
                        (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist()
        return boundaries

[docs]    def get_boundary(self, score_maps, img_metas, rescale):
        """Compute text boundaries via post processing.

        Args:
            score_maps (Tensor): The text score map.
            img_metas (dict): The image meta info.
            rescale (bool): Rescale boundaries to the original image resolution
                if true, and keep the score_maps resolution if false.

        Returns:
            results (dict): The result dict.
        """

        assert check_argument.is_type_list(img_metas, dict)
        assert isinstance(rescale, bool)

        score_maps = score_maps.squeeze()
        boundaries = decode(
            decoding_type=self.decoding_type,
            preds=score_maps,
            text_repr_type=self.text_repr_type)
        if rescale:
            boundaries = self.resize_boundary(
                boundaries,
                1.0 / self.downsample_ratio / img_metas[0]['scale_factor'])
        results = dict(boundary_result=boundaries)
        return results

[docs]    def loss(self, pred_maps, **kwargs):
        """Compute the loss for text detection.

        Args:
            pred_maps (tensor): The input score maps of NxCxHxW.

        Returns:
            losses (dict): The dict for losses.
        """
        losses = self.loss_module(pred_maps, self.downsample_ratio, **kwargs)
        return losses