Shortcuts

Source code for mmocr.datasets.utils.loader

# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp

from mmocr.datasets.builder import LOADERS, build_parser
from mmocr.utils import list_from_file


@LOADERS.register_module()
class Loader:
    """Load annotation from annotation file, and parse instance information to
    dict format with parser.

    Args:
        ann_file (str): Annotation file path.
        parser (dict): Dictionary to construct parser
            to parse original annotation infos.
        repeat (int): Repeated times of annotations.
    """

    def __init__(self, ann_file, parser, repeat=1):
        assert isinstance(ann_file, str)
        assert isinstance(repeat, int)
        assert isinstance(parser, dict)
        assert repeat > 0
        assert osp.exists(ann_file), f'{ann_file} is not exist'

        self.ori_data_infos = self._load(ann_file)
        self.parser = build_parser(parser)
        self.repeat = repeat

    def __len__(self):
        return len(self.ori_data_infos) * self.repeat

    def _load(self, ann_file):
        """Load annotation file."""
        raise NotImplementedError

    def __getitem__(self, index):
        """Retrieve anno info of one instance with dict format."""
        return self.parser.get_item(self.ori_data_infos, index)

    def __iter__(self):
        self._n = 0
        return self

    def __next__(self):
        if self._n < len(self):
            data = self[self._n]
            self._n += 1
            return data
        raise StopIteration


[docs]@LOADERS.register_module() class HardDiskLoader(Loader): """Load annotation file from hard disk to RAM. Args: ann_file (str): Annotation file path. """ def _load(self, ann_file): return list_from_file(ann_file)
[docs]@LOADERS.register_module() class LmdbLoader(Loader): """Load annotation file with lmdb storage backend.""" def _load(self, ann_file): lmdb_anno_obj = LmdbAnnFileBackend(ann_file) return lmdb_anno_obj
class LmdbAnnFileBackend: """Lmdb storage backend for annotation file. Args: lmdb_path (str): Lmdb file path. """ def __init__(self, lmdb_path, coding='utf8'): self.lmdb_path = lmdb_path self.coding = coding env = self._get_env() with env.begin(write=False) as txn: self.total_number = int( txn.get('total_number'.encode(self.coding)).decode( self.coding)) def __getitem__(self, index): """Retrieval one line from lmdb file by index.""" # only attach env to self when __getitem__ is called # because env object cannot be pickle if not hasattr(self, 'env'): self.env = self._get_env() with self.env.begin(write=False) as txn: line = txn.get(str(index).encode(self.coding)).decode(self.coding) return line def __len__(self): return self.total_number def _get_env(self): import lmdb return lmdb.open( self.lmdb_path, max_readers=1, readonly=True, lock=False, readahead=False, meminit=False, )
Read the Docs v: v0.4.0
Versions
latest
stable
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.