Note
You are reading the documentation for MMOCR 0.x, which will soon be deprecated by the end of 2022. We recommend you upgrade to MMOCR 1.0 to enjoy fruitful new features and better performance brought by OpenMMLab 2.0. Check out the maintenance plan, changelog, code and documentation of MMOCR 1.0 for more details.
Source code for mmocr.datasets.pipelines.loading

# Copyright (c) OpenMMLab. All rights reserved.
import warnings

import lmdb
import mmcv
import numpy as np
from mmdet.core import BitmapMasks, PolygonMasks
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines.loading import LoadAnnotations, LoadImageFromFile


[docs]@PIPELINES.register_module()
class LoadTextAnnotations(LoadAnnotations):
    """Load annotations for text detection.

    Args:
        with_bbox (bool): Whether to parse and load the bbox annotation.
             Default: True.
        with_label (bool): Whether to parse and load the label annotation.
            Default: True.
        with_mask (bool): Whether to parse and load the mask annotation.
             Default: False.
        with_seg (bool): Whether to parse and load the semantic segmentation
            annotation. Default: False.
        poly2mask (bool): Whether to convert the instance masks from polygons
            to bitmaps. Default: True.
        use_img_shape (bool): Use the shape of loaded image from
            previous pipeline ``LoadImageFromFile`` to generate mask.
    """

    def __init__(self,
                 with_bbox=True,
                 with_label=True,
                 with_mask=False,
                 with_seg=False,
                 poly2mask=True,
                 use_img_shape=False):
        super().__init__(
            with_bbox=with_bbox,
            with_label=with_label,
            with_mask=with_mask,
            with_seg=with_seg,
            poly2mask=poly2mask)

        self.use_img_shape = use_img_shape

[docs]    def process_polygons(self, polygons):
        """Convert polygons to list of ndarray and filter invalid polygons.

        Args:
            polygons (list[list]): Polygons of one instance.

        Returns:
            list[numpy.ndarray]: Processed polygons.
        """

        polygons = [np.array(p).astype(np.float32) for p in polygons]
        valid_polygons = []
        for polygon in polygons:
            if len(polygon) % 2 == 0 and len(polygon) >= 6:
                valid_polygons.append(polygon)
        return valid_polygons

    def _load_masks(self, results):
        ann_info = results['ann_info']
        h, w = results['img_info']['height'], results['img_info']['width']
        if self.use_img_shape:
            if results.get('ori_shape', None):
                h, w = results['ori_shape'][:2]
                results['img_info']['height'] = h
                results['img_info']['width'] = w
            else:
                warnings.warn('"ori_shape" not in results, use the shape '
                              'in "img_info" instead.')
        gt_masks = ann_info['masks']
        if self.poly2mask:
            gt_masks = BitmapMasks(
                [self._poly2mask(mask, h, w) for mask in gt_masks], h, w)
        else:
            gt_masks = PolygonMasks(
                [self.process_polygons(polygons) for polygons in gt_masks], h,
                w)
        gt_masks_ignore = ann_info.get('masks_ignore', None)
        if gt_masks_ignore is not None:
            if self.poly2mask:
                gt_masks_ignore = BitmapMasks(
                    [self._poly2mask(mask, h, w) for mask in gt_masks_ignore],
                    h, w)
            else:
                gt_masks_ignore = PolygonMasks([
                    self.process_polygons(polygons)
                    for polygons in gt_masks_ignore
                ], h, w)
            results['gt_masks_ignore'] = gt_masks_ignore
            results['mask_fields'].append('gt_masks_ignore')

        results['gt_masks'] = gt_masks
        results['mask_fields'].append('gt_masks')
        return results


[docs]@PIPELINES.register_module()
class LoadImageFromNdarray(LoadImageFromFile):
    """Load an image from np.ndarray.

    Similar with :obj:`LoadImageFromFile`, but the image read from
    ``results['img']``, which is np.ndarray.
    """

    def __call__(self, results):
        """Call functions to add image meta information.

        Args:
            results (dict): Result dict with Webcam read image in
                ``results['img']``.

        Returns:
            dict: The dict contains loaded image and meta information.
        """
        assert results['img'].dtype == 'uint8'

        img = results['img']
        if self.color_type == 'grayscale' and img.shape[2] == 3:
            img = mmcv.bgr2gray(img, keepdim=True)
        if self.color_type == 'color' and img.shape[2] == 1:
            img = mmcv.gray2bgr(img)
        if self.to_float32:
            img = img.astype(np.float32)

        results['filename'] = None
        results['ori_filename'] = None
        results['img'] = img
        results['img_shape'] = img.shape
        results['ori_shape'] = img.shape
        results['img_fields'] = ['img']
        return results


[docs]@PIPELINES.register_module()
class LoadImageFromLMDB(object):
    """Load an image from lmdb file.

    Similar with :obj:'LoadImageFromFile', but the image read from
    "results['img_info']['filename']", which is a data index of lmdb file.
    """

    def __init__(self, color_type='color'):
        self.color_type = color_type
        self.env = None
        self.txn = None

    def __call__(self, results):
        img_key = results['img_info']['filename']
        lmdb_path = results['img_prefix']

        # lmdb env
        if self.env is None:
            self.env = lmdb.open(
                lmdb_path,
                max_readers=1,
                readonly=True,
                lock=False,
                readahead=False,
                meminit=False,
            )
        # read image
        with self.env.begin(write=False) as txn:
            imgbuf = txn.get(img_key.encode('utf-8'))
            try:
                img = mmcv.imfrombytes(imgbuf, flag=self.color_type)
            except IOError:
                print('Corrupted image for {}'.format(img_key))
                return None

            results['filename'] = img_key
            results['ori_filename'] = img_key
            results['img'] = img
            results['img_shape'] = img.shape
            results['ori_shape'] = img.shape
            results['img_fields'] = ['img']
            return results

    def __repr__(self):
        return '{} (color_type={})'.format(self.__class__.__name__,
                                           self.color_type)

    def __del__(self):
        if self.env is not None:
            self.env.close()