Shortcuts

Source code for mmocr.models.textdet.data_preprocessors.data_preprocessor

# Copyright (c) OpenMMLab. All rights reserved.
from numbers import Number
from typing import Dict, List, Optional, Sequence, Union

import torch.nn as nn
from mmengine.model import ImgDataPreprocessor

from mmocr.registry import MODELS


[docs]@MODELS.register_module() class TextDetDataPreprocessor(ImgDataPreprocessor): """Image pre-processor for detection tasks. Comparing with the :class:`mmengine.ImgDataPreprocessor`, 1. It supports batch augmentations. 2. It will additionally append batch_input_shape and pad_shape to data_samples considering the object detection task. It provides the data pre-processing as follows - Collate and move data to the target device. - Pad inputs to the maximum size of current batch with defined ``pad_value``. The padding size can be divisible by a defined ``pad_size_divisor`` - Stack inputs to batch_inputs. - Convert inputs from bgr to rgb if the shape of input is (3, H, W). - Normalize image with defined std and mean. - Do batch augmentations during training. Args: mean (Sequence[Number], optional): The pixel mean of R, G, B channels. Defaults to None. std (Sequence[Number], optional): The pixel standard deviation of R, G, B channels. Defaults to None. pad_size_divisor (int): The size of padded image should be divisible by ``pad_size_divisor``. Defaults to 1. pad_value (Number): The padded pixel value. Defaults to 0. pad_mask (bool): Whether to pad instance masks. Defaults to False. mask_pad_value (int): The padded pixel value for instance masks. Defaults to 0. pad_seg (bool): Whether to pad semantic segmentation maps. Defaults to False. seg_pad_value (int): The padded pixel value for semantic segmentation maps. Defaults to 255. bgr_to_rgb (bool): whether to convert image from BGR to RGB. Defaults to False. rgb_to_bgr (bool): whether to convert image from RGB to RGB. Defaults to False. batch_augments (list[dict], optional): Batch-level augmentations """ def __init__(self, mean: Sequence[Number] = None, std: Sequence[Number] = None, pad_size_divisor: int = 1, pad_value: Union[float, int] = 0, bgr_to_rgb: bool = False, rgb_to_bgr: bool = False, batch_augments: Optional[List[Dict]] = None) -> None: super().__init__( mean=mean, std=std, pad_size_divisor=pad_size_divisor, pad_value=pad_value, bgr_to_rgb=bgr_to_rgb, rgb_to_bgr=rgb_to_bgr) if batch_augments is not None: self.batch_augments = nn.ModuleList( [MODELS.build(aug) for aug in batch_augments]) else: self.batch_augments = None
[docs] def forward(self, data: Dict, training: bool = False) -> Dict: """Perform normalization、padding and bgr2rgb conversion based on ``BaseDataPreprocessor``. Args: data (dict): data sampled from dataloader. training (bool): Whether to enable training time augmentation. Returns: dict: Data in the same format as the model input. """ data = super().forward(data=data, training=training) inputs, data_samples = data['inputs'], data['data_samples'] if data_samples is not None: batch_input_shape = tuple(inputs[0].size()[-2:]) for data_sample in data_samples: data_sample.set_metainfo( {'batch_input_shape': batch_input_shape}) if training and self.batch_augments is not None: for batch_aug in self.batch_augments: inputs, data_samples = batch_aug(inputs, data_samples) return data
Read the Docs v: dev-1.x
Versions
latest
stable
v1.0.1
v1.0.0
0.x
v0.6.3
v0.6.2
v0.6.1
v0.6.0
v0.5.0
v0.4.1
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
dev-1.x
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.