Source code for mmocr.models.textrecog.backbones.very_deep_vgg

import torch.nn as nn
from mmcv.cnn import uniform_init, xavier_init

from mmdet.models.builder import BACKBONES


[docs]@BACKBONES.register_module()
class VeryDeepVgg(nn.Module):
    """Implement VGG-VeryDeep backbone for text recognition, modified from
      `VGG-VeryDeep <https://arxiv.org/pdf/1409.1556.pdf>`_
    Args:
        input_channels (int): Number of channels of input image tensor.
        leakyRelu (bool): Use leakyRelu or not.
    """

    def __init__(self, leakyRelu=True, input_channels=3):
        super().__init__()

        ks = [3, 3, 3, 3, 3, 3, 2]
        ps = [1, 1, 1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1, 1, 1]
        nm = [64, 128, 256, 256, 512, 512, 512]

        self.channels = nm

        cnn = nn.Sequential()

        def convRelu(i, batchNormalization=False):
            nIn = input_channels if i == 0 else nm[i - 1]
            nOut = nm[i]
            cnn.add_module('conv{0}'.format(i),
                           nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
            if batchNormalization:
                cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
            if leakyRelu:
                cnn.add_module('relu{0}'.format(i),
                               nn.LeakyReLU(0.2, inplace=True))
            else:
                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))

        convRelu(0)
        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
        convRelu(1)
        cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
        convRelu(2, True)
        convRelu(3)
        cnn.add_module('pooling{0}'.format(2),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
        convRelu(4, True)
        convRelu(5)
        cnn.add_module('pooling{0}'.format(3),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
        convRelu(6, True)  # 512x1x16

        self.cnn = cnn

    def init_weights(self, pretrained=None):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                xavier_init(m)
            elif isinstance(m, nn.BatchNorm2d):
                uniform_init(m)

    def out_channels(self):
        return self.channels[-1]

    def forward(self, x):
        output = self.cnn(x)

        return output