Note
You are reading the documentation for MMOCR 0.x, which will soon be deprecated by the end of 2022. We recommend you upgrade to MMOCR 1.0 to enjoy fruitful new features and better performance brought by OpenMMLab 2.0. Check out the maintenance plan, changelog, code and documentation of MMOCR 1.0 for more details.
Source code for mmocr.models.textrecog.necks.fpn_ocr
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn.functional as F
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule, ModuleList
from mmocr.models.builder import NECKS
[docs]@NECKS.register_module()
class FPNOCR(BaseModule):
"""FPN-like Network for segmentation based text recognition.
Args:
in_channels (list[int]): Number of input channels :math:`C_i` for each
scale.
out_channels (int): Number of output channels :math:`C_{out}` for each
scale.
last_stage_only (bool): If True, output last stage only.
init_cfg (dict or list[dict], optional): Initialization configs.
"""
def __init__(self,
in_channels,
out_channels,
last_stage_only=True,
init_cfg=None):
super().__init__(init_cfg=init_cfg)
self.in_channels = in_channels
self.out_channels = out_channels
self.num_ins = len(in_channels)
self.last_stage_only = last_stage_only
self.lateral_convs = ModuleList()
self.smooth_convs_1x1 = ModuleList()
self.smooth_convs_3x3 = ModuleList()
for i in range(self.num_ins):
l_conv = ConvModule(
in_channels[i], out_channels, 1, norm_cfg=dict(type='BN'))
self.lateral_convs.append(l_conv)
for i in range(self.num_ins - 1):
s_conv_1x1 = ConvModule(
out_channels * 2, out_channels, 1, norm_cfg=dict(type='BN'))
s_conv_3x3 = ConvModule(
out_channels,
out_channels,
3,
padding=1,
norm_cfg=dict(type='BN'))
self.smooth_convs_1x1.append(s_conv_1x1)
self.smooth_convs_3x3.append(s_conv_3x3)
def _upsample_x2(self, x):
return F.interpolate(x, scale_factor=2, mode='bilinear')
[docs] def forward(self, inputs):
"""
Args:
inputs (list[Tensor]): A list of n tensors. Each tensor has the
shape of :math:`(N, C_i, H_i, W_i)`. It usually expects 4
tensors (C2-C5 features) from ResNet.
Returns:
tuple(Tensor): A tuple of n-1 tensors. Each has the of shape
:math:`(N, C_{out}, H_{n-2-i}, W_{n-2-i})`. If
``last_stage_only=True`` (default), the size of the
tuple is 1 and only the last element will be returned.
"""
lateral_features = [
l_conv(inputs[i]) for i, l_conv in enumerate(self.lateral_convs)
]
outs = []
for i in range(len(self.smooth_convs_3x3), 0, -1): # 3, 2, 1
last_out = lateral_features[-1] if len(outs) == 0 else outs[-1]
upsample = self._upsample_x2(last_out)
upsample_cat = torch.cat((upsample, lateral_features[i - 1]),
dim=1)
smooth_1x1 = self.smooth_convs_1x1[i - 1](upsample_cat)
smooth_3x3 = self.smooth_convs_3x3[i - 1](smooth_1x1)
outs.append(smooth_3x3)
return tuple(outs[-1:]) if self.last_stage_only else tuple(outs)