Shortcuts

Source code for mmocr.datasets.dataset_wrapper

# Copyright (c) OpenMMLab. All rights reserved.
from typing import Callable, List, Sequence, Union

from mmengine.dataset import BaseDataset, Compose
from mmengine.dataset import ConcatDataset as MMENGINE_CONCATDATASET

from mmocr.registry import DATASETS


[docs]@DATASETS.register_module() class ConcatDataset(MMENGINE_CONCATDATASET): """A wrapper of concatenated dataset. Same as ``torch.utils.data.dataset.ConcatDataset`` and support lazy_init. Note: ``ConcatDataset`` should not inherit from ``BaseDataset`` since ``get_subset`` and ``get_subset_`` could produce ambiguous meaning sub-dataset which conflicts with original dataset. If you want to use a sub-dataset of ``ConcatDataset``, you should set ``indices`` arguments for wrapped dataset which inherit from ``BaseDataset``. Args: datasets (Sequence[BaseDataset] or Sequence[dict]): A list of datasets which will be concatenated. pipeline (list, optional): Processing pipeline to be applied to all of the concatenated datasets. Defaults to []. verify_meta (bool): Whether to verify the consistency of meta information of the concatenated datasets. Defaults to True. force_apply (bool): Whether to force apply pipeline to all datasets if any of them already has the pipeline configured. Defaults to False. lazy_init (bool, optional): Whether to load annotation during instantiation. Defaults to False. """ def __init__(self, datasets: Sequence[Union[BaseDataset, dict]], pipeline: List[Union[dict, Callable]] = [], verify_meta: bool = True, force_apply: bool = False, lazy_init: bool = False): self.datasets: List[BaseDataset] = [] # Compose dataset pipeline = Compose(pipeline) for i, dataset in enumerate(datasets): if isinstance(dataset, dict): self.datasets.append(DATASETS.build(dataset)) elif isinstance(dataset, BaseDataset): self.datasets.append(dataset) else: raise TypeError( 'elements in datasets sequence should be config or ' f'`BaseDataset` instance, but got {type(dataset)}') if len(pipeline.transforms) > 0: if len(self.datasets[-1].pipeline.transforms ) > 0 and not force_apply: raise ValueError( f'The pipeline of dataset {i} is not empty, ' 'please set `force_apply` to True.') self.datasets[-1].pipeline = pipeline self._metainfo = self.datasets[0].metainfo if verify_meta: # Only use metainfo of first dataset. for i, dataset in enumerate(self.datasets, 1): if self._metainfo != dataset.metainfo: raise ValueError( f'The meta information of the {i}-th dataset does not ' 'match meta information of the first dataset') self._fully_initialized = False if not lazy_init: self.full_init() self._metainfo.update(dict(cumulative_sizes=self.cumulative_sizes))
Read the Docs v: dev-1.x
Versions
latest
stable
v1.0.1
v1.0.0
0.x
v0.6.3
v0.6.2
v0.6.1
v0.6.0
v0.5.0
v0.4.1
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
dev-1.x
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.