Source code for mmocr.models.textrecog.backbones.very_deep_vgg

import torch.nn as nn
from mmcv.cnn import uniform_init, xavier_init

from mmdet.models.builder import BACKBONES


[docs]@BACKBONES.register_module() class VeryDeepVgg(nn.Module): """Implement VGG-VeryDeep backbone for text recognition, modified from `VGG-VeryDeep <https://arxiv.org/pdf/1409.1556.pdf>`_ Args: input_channels (int): Number of channels of input image tensor. leakyRelu (bool): Use leakyRelu or not. """ def __init__(self, leakyRelu=True, input_channels=3): super().__init__() ks = [3, 3, 3, 3, 3, 3, 2] ps = [1, 1, 1, 1, 1, 1, 0] ss = [1, 1, 1, 1, 1, 1, 1] nm = [64, 128, 256, 256, 512, 512, 512] self.channels = nm cnn = nn.Sequential() def convRelu(i, batchNormalization=False): nIn = input_channels if i == 0 else nm[i - 1] nOut = nm[i] cnn.add_module('conv{0}'.format(i), nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) if batchNormalization: cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut)) if leakyRelu: cnn.add_module('relu{0}'.format(i), nn.LeakyReLU(0.2, inplace=True)) else: cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) convRelu(0) cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 convRelu(1) cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 convRelu(2, True) convRelu(3) cnn.add_module('pooling{0}'.format(2), nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16 convRelu(4, True) convRelu(5) cnn.add_module('pooling{0}'.format(3), nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16 convRelu(6, True) # 512x1x16 self.cnn = cnn def init_weights(self, pretrained=None): for m in self.modules(): if isinstance(m, nn.Conv2d): xavier_init(m) elif isinstance(m, nn.BatchNorm2d): uniform_init(m) def out_channels(self): return self.channels[-1] def forward(self, x): output = self.cnn(x) return output