Shortcuts

Source code for mmocr.models.textrecog.backbones.shallow_cnn

# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule

from mmocr.models.builder import BACKBONES


[docs]@BACKBONES.register_module() class ShallowCNN(BaseModule): """Implement Shallow CNN block for SATRN. SATRN: `On Recognizing Texts of Arbitrary Shapes with 2D Self-Attention <https://arxiv.org/pdf/1910.04396.pdf>`_. Args: base_channels (int): Number of channels of input image tensor :math:`D_i`. hidden_dim (int): Size of hidden layers of the model :math:`D_m`. init_cfg (dict or list[dict], optional): Initialization configs. """ def __init__(self, input_channels=1, hidden_dim=512, init_cfg=[ dict(type='Kaiming', layer='Conv2d'), dict(type='Uniform', layer='BatchNorm2d') ]): super().__init__(init_cfg=init_cfg) assert isinstance(input_channels, int) assert isinstance(hidden_dim, int) self.conv1 = ConvModule( input_channels, hidden_dim // 2, kernel_size=3, stride=1, padding=1, bias=False, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU')) self.conv2 = ConvModule( hidden_dim // 2, hidden_dim, kernel_size=3, stride=1, padding=1, bias=False, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU')) self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
[docs] def forward(self, x): """ Args: x (Tensor): Input image feature :math:`(N, D_i, H, W)`. Returns: Tensor: A tensor of shape :math:`(N, D_m, H/4, W/4)`. """ x = self.conv1(x) x = self.pool(x) x = self.conv2(x) x = self.pool(x) return x
Read the Docs v: v0.4.0
Versions
latest
stable
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.