Shortcuts

Note

You are reading the documentation for MMOCR 0.x, which will soon be deprecated by the end of 2022. We recommend you upgrade to MMOCR 1.0 to enjoy fruitful new features and better performance brought by OpenMMLab 2.0. Check out the maintenance plan, changelog, code and documentation of MMOCR 1.0 for more details.

Source code for mmocr.datasets.utils.loader

# Copyright (c) OpenMMLab. All rights reserved.
import warnings

from mmocr.datasets.builder import LOADERS, build_parser
from .backend import (HardDiskAnnFileBackend, HTTPAnnFileBackend,
                      PetrelAnnFileBackend)


[docs]@LOADERS.register_module() class AnnFileLoader: """Annotation file loader to load annotations from ann_file, and parse raw annotation to dict format with certain parser. Args: ann_file (str): Annotation file path. parser (dict): Dictionary to construct parser to parse original annotation infos. repeat (int|float): Repeated times of dataset. file_storage_backend (str): The storage backend type for annotation file. Options are "disk", "http" and "petrel". Default: "disk". file_format (str): The format of annotation file. Options are "txt" and "lmdb". Default: "txt". """ _backends = { 'disk': HardDiskAnnFileBackend, 'petrel': PetrelAnnFileBackend, 'http': HTTPAnnFileBackend } def __init__(self, ann_file, parser, repeat=1, file_storage_backend='disk', file_format='txt', **kwargs): assert isinstance(ann_file, str) assert isinstance(repeat, (int, float)) assert isinstance(parser, dict) assert repeat > 0 assert file_storage_backend in ['disk', 'http', 'petrel'] assert file_format in ['txt', 'lmdb'] if file_format == 'lmdb' and parser['type'] == 'LineStrParser': raise ValueError('We only support using LineJsonParser ' 'to parse lmdb file. Please use LineJsonParser ' 'in the dataset config') self.parser = build_parser(parser) self.repeat = repeat self.ann_file_backend = self._backends[file_storage_backend]( file_format, **kwargs) self.ori_data_infos = self._load(ann_file) def __len__(self): return int(len(self.ori_data_infos) * self.repeat) def _load(self, ann_file): """Load annotation file.""" return self.ann_file_backend(ann_file) def __getitem__(self, index): """Retrieve anno info of one instance with dict format.""" return self.parser.get_item(self.ori_data_infos, index) def __iter__(self): self._n = 0 return self def __next__(self): if self._n < len(self): data = self[self._n] self._n += 1 return data raise StopIteration
[docs] def close(self): """For ann_file with lmdb format only.""" self.ori_data_infos.close()
[docs]@LOADERS.register_module() class HardDiskLoader(AnnFileLoader): """Load txt format annotation file from hard disks.""" def __init__(self, ann_file, parser, repeat=1): warnings.warn( 'HardDiskLoader is deprecated, please use ' 'AnnFileLoader instead.', UserWarning) super().__init__( ann_file, parser, repeat, file_storage_backend='disk', file_format='txt')
[docs]@LOADERS.register_module() class LmdbLoader(AnnFileLoader): """Load lmdb format annotation file from hard disks.""" def __init__(self, ann_file, parser, repeat=1): warnings.warn( 'LmdbLoader is deprecated, please use ' 'AnnFileLoader instead.', UserWarning) super().__init__( ann_file, parser, repeat, file_storage_backend='disk', file_format='lmdb')
Read the Docs v: v0.6.3
Versions
latest
stable
v0.6.3
v0.6.2
v0.6.1
v0.6.0
v0.5.0
v0.4.1
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
dev-1.x
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.