Note

You are reading the documentation for MMOCR 0.x, which will soon be deprecated by the end of 2022. We recommend you upgrade to MMOCR 1.0 to enjoy fruitful new features and better performance brought by OpenMMLab 2.0. Check out the maintenance plan, changelog, code and documentation of MMOCR 1.0 for more details.

Source code for mmocr.models.textrecog.necks.fpn_ocr

# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn.functional as F
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule, ModuleList

from mmocr.models.builder import NECKS


[docs]@NECKS.register_module()
class FPNOCR(BaseModule):
    """FPN-like Network for segmentation based text recognition.

    Args:
        in_channels (list[int]): Number of input channels :math:`C_i` for each
            scale.
        out_channels (int): Number of output channels :math:`C_{out}` for each
            scale.
        last_stage_only (bool): If True, output last stage only.
        init_cfg (dict or list[dict], optional): Initialization configs.
    """

    def __init__(self,
                 in_channels,
                 out_channels,
                 last_stage_only=True,
                 init_cfg=None):
        super().__init__(init_cfg=init_cfg)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)

        self.last_stage_only = last_stage_only

        self.lateral_convs = ModuleList()
        self.smooth_convs_1x1 = ModuleList()
        self.smooth_convs_3x3 = ModuleList()

        for i in range(self.num_ins):
            l_conv = ConvModule(
                in_channels[i], out_channels, 1, norm_cfg=dict(type='BN'))
            self.lateral_convs.append(l_conv)

        for i in range(self.num_ins - 1):
            s_conv_1x1 = ConvModule(
                out_channels * 2, out_channels, 1, norm_cfg=dict(type='BN'))
            s_conv_3x3 = ConvModule(
                out_channels,
                out_channels,
                3,
                padding=1,
                norm_cfg=dict(type='BN'))
            self.smooth_convs_1x1.append(s_conv_1x1)
            self.smooth_convs_3x3.append(s_conv_3x3)

    def _upsample_x2(self, x):
        return F.interpolate(x, scale_factor=2, mode='bilinear')

[docs]    def forward(self, inputs):
        """
        Args:
            inputs (list[Tensor]): A list of n tensors. Each tensor has the
                shape of :math:`(N, C_i, H_i, W_i)`. It usually expects 4
                tensors (C2-C5 features) from ResNet.

        Returns:
            tuple(Tensor): A tuple of n-1 tensors. Each has the of shape
            :math:`(N, C_{out}, H_{n-2-i}, W_{n-2-i})`. If
            ``last_stage_only=True`` (default), the size of the
            tuple is 1 and only the last element will be returned.
        """
        lateral_features = [
            l_conv(inputs[i]) for i, l_conv in enumerate(self.lateral_convs)
        ]

        outs = []
        for i in range(len(self.smooth_convs_3x3), 0, -1):  # 3, 2, 1
            last_out = lateral_features[-1] if len(outs) == 0 else outs[-1]
            upsample = self._upsample_x2(last_out)
            upsample_cat = torch.cat((upsample, lateral_features[i - 1]),
                                     dim=1)
            smooth_1x1 = self.smooth_convs_1x1[i - 1](upsample_cat)
            smooth_3x3 = self.smooth_convs_3x3[i - 1](smooth_1x1)
            outs.append(smooth_3x3)

        return tuple(outs[-1:]) if self.last_stage_only else tuple(outs)