Source code for mmocr.models.textdet.dense_heads.head_mixin

import numpy as np

from mmdet.models.builder import HEADS
from mmocr.models.textdet.postprocess import decode
from mmocr.utils import check_argument


[docs]@HEADS.register_module() class HeadMixin: """The head minxin for dbnet and pannet heads."""
[docs] def resize_boundary(self, boundaries, scale_factor): """Rescale boundaries via scale_factor. Args: boundaries (list[list[float]]): The boundary list. Each boundary with size 2k+1 with k>=4. scale_factor(ndarray): The scale factor of size (4,). Returns: boundaries (list[list[float]]): The scaled boundaries. """ assert check_argument.is_2dlist(boundaries) assert isinstance(scale_factor, np.ndarray) assert scale_factor.shape[0] == 4 for b in boundaries: sz = len(b) check_argument.valid_boundary(b, True) b[:sz - 1] = (np.array(b[:sz - 1]) * (np.tile(scale_factor[:2], int( (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist() return boundaries
[docs] def get_boundary(self, score_maps, img_metas, rescale): """Compute text boundaries via post processing. Args: score_maps (Tensor): The text score map. img_metas (dict): The image meta info. rescale (bool): Rescale boundaries to the original image resolution if true, and keep the score_maps resolution if false. Returns: results (dict): The result dict. """ assert check_argument.is_type_list(img_metas, dict) assert isinstance(rescale, bool) score_maps = score_maps.squeeze() boundaries = decode( decoding_type=self.decoding_type, preds=score_maps, text_repr_type=self.text_repr_type) if rescale: boundaries = self.resize_boundary( boundaries, 1.0 / self.downsample_ratio / img_metas[0]['scale_factor']) results = dict(boundary_result=boundaries) return results
[docs] def loss(self, pred_maps, **kwargs): """Compute the loss for text detection. Args: pred_maps (tensor): The input score maps of NxCxHxW. Returns: losses (dict): The dict for losses. """ losses = self.loss_module(pred_maps, self.downsample_ratio, **kwargs) return losses