

You are reading the documentation for MMOCR 0.x, which will soon be deprecated by the end of 2022. We recommend you upgrade to MMOCR 1.0 to enjoy fruitful new features and better performance brought by OpenMMLab 2.0. Check out the maintenance plan, changelog, code and documentation of MMOCR 1.0 for more details.

Source code for mmocr.datasets.pipelines.loading

# Copyright (c) OpenMMLab. All rights reserved.
import warnings

import lmdb
import mmcv
import numpy as np
from mmdet.core import BitmapMasks, PolygonMasks
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines.loading import LoadAnnotations, LoadImageFromFile

[docs]@PIPELINES.register_module() class LoadTextAnnotations(LoadAnnotations): """Load annotations for text detection. Args: with_bbox (bool): Whether to parse and load the bbox annotation. Default: True. with_label (bool): Whether to parse and load the label annotation. Default: True. with_mask (bool): Whether to parse and load the mask annotation. Default: False. with_seg (bool): Whether to parse and load the semantic segmentation annotation. Default: False. poly2mask (bool): Whether to convert the instance masks from polygons to bitmaps. Default: True. use_img_shape (bool): Use the shape of loaded image from previous pipeline ``LoadImageFromFile`` to generate mask. """ def __init__(self, with_bbox=True, with_label=True, with_mask=False, with_seg=False, poly2mask=True, use_img_shape=False): super().__init__( with_bbox=with_bbox, with_label=with_label, with_mask=with_mask, with_seg=with_seg, poly2mask=poly2mask) self.use_img_shape = use_img_shape
[docs] def process_polygons(self, polygons): """Convert polygons to list of ndarray and filter invalid polygons. Args: polygons (list[list]): Polygons of one instance. Returns: list[numpy.ndarray]: Processed polygons. """ polygons = [np.array(p).astype(np.float32) for p in polygons] valid_polygons = [] for polygon in polygons: if len(polygon) % 2 == 0 and len(polygon) >= 6: valid_polygons.append(polygon) return valid_polygons
def _load_masks(self, results): ann_info = results['ann_info'] h, w = results['img_info']['height'], results['img_info']['width'] if self.use_img_shape: if results.get('ori_shape', None): h, w = results['ori_shape'][:2] results['img_info']['height'] = h results['img_info']['width'] = w else: warnings.warn('"ori_shape" not in results, use the shape ' 'in "img_info" instead.') gt_masks = ann_info['masks'] if self.poly2mask: gt_masks = BitmapMasks( [self._poly2mask(mask, h, w) for mask in gt_masks], h, w) else: gt_masks = PolygonMasks( [self.process_polygons(polygons) for polygons in gt_masks], h, w) gt_masks_ignore = ann_info.get('masks_ignore', None) if gt_masks_ignore is not None: if self.poly2mask: gt_masks_ignore = BitmapMasks( [self._poly2mask(mask, h, w) for mask in gt_masks_ignore], h, w) else: gt_masks_ignore = PolygonMasks([ self.process_polygons(polygons) for polygons in gt_masks_ignore ], h, w) results['gt_masks_ignore'] = gt_masks_ignore results['mask_fields'].append('gt_masks_ignore') results['gt_masks'] = gt_masks results['mask_fields'].append('gt_masks') return results
[docs]@PIPELINES.register_module() class LoadImageFromNdarray(LoadImageFromFile): """Load an image from np.ndarray. Similar with :obj:`LoadImageFromFile`, but the image read from ``results['img']``, which is np.ndarray. """ def __call__(self, results): """Call functions to add image meta information. Args: results (dict): Result dict with Webcam read image in ``results['img']``. Returns: dict: The dict contains loaded image and meta information. """ assert results['img'].dtype == 'uint8' img = results['img'] if self.color_type == 'grayscale' and img.shape[2] == 3: img = mmcv.bgr2gray(img, keepdim=True) if self.color_type == 'color' and img.shape[2] == 1: img = mmcv.gray2bgr(img) if self.to_float32: img = img.astype(np.float32) results['filename'] = None results['ori_filename'] = None results['img'] = img results['img_shape'] = img.shape results['ori_shape'] = img.shape results['img_fields'] = ['img'] return results
[docs]@PIPELINES.register_module() class LoadImageFromLMDB(object): """Load an image from lmdb file. Similar with :obj:'LoadImageFromFile', but the image read from "results['img_info']['filename']", which is a data index of lmdb file. """ def __init__(self, color_type='color'): self.color_type = color_type self.env = None self.txn = None def __call__(self, results): img_key = results['img_info']['filename'] lmdb_path = results['img_prefix'] # lmdb env if self.env is None: self.env = lmdb_path, max_readers=1, readonly=True, lock=False, readahead=False, meminit=False, ) # read image with self.env.begin(write=False) as txn: imgbuf = txn.get(img_key.encode('utf-8')) try: img = mmcv.imfrombytes(imgbuf, flag=self.color_type) except IOError: print('Corrupted image for {}'.format(img_key)) return None results['filename'] = img_key results['ori_filename'] = img_key results['img'] = img results['img_shape'] = img.shape results['ori_shape'] = img.shape results['img_fields'] = ['img'] return results def __repr__(self): return '{} (color_type={})'.format(self.__class__.__name__, self.color_type) def __del__(self): if self.env is not None: self.env.close()
Read the Docs v: v0.6.3
On Read the Docs
Project Home

Free document hosting provided by Read the Docs.