Note
You are reading the documentation for MMOCR 0.x, which will soon be deprecated by the end of 2022. We recommend you upgrade to MMOCR 1.0 to enjoy fruitful new features and better performance brought by OpenMMLab 2.0. Check out the maintenance plan, changelog, code and documentation of MMOCR 1.0 for more details.
Source code for mmocr.datasets.uniform_concat_dataset
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import defaultdict
import numpy as np
from mmcv.utils import print_log
from mmdet.datasets import DATASETS, ConcatDataset, build_dataset
from mmocr.utils import is_2dlist, is_type_list
[docs]@DATASETS.register_module()
class UniformConcatDataset(ConcatDataset):
"""A wrapper of ConcatDataset which support dataset pipeline assignment and
replacement.
Args:
datasets (list[dict] | list[list[dict]]): A list of datasets cfgs.
separate_eval (bool): Whether to evaluate the results
separately if it is used as validation dataset.
Defaults to True.
show_mean_scores (str | bool): Whether to compute the mean evaluation
results, only applicable when ``separate_eval=True``. Options are
[True, False, ``auto``]. If ``True``, mean results will be added to
the result dictionary with keys in the form of
``mean_{metric_name}``. If 'auto', mean results will be shown only
when more than 1 dataset is wrapped.
pipeline (None | list[dict] | list[list[dict]]): If ``None``,
each dataset in datasets use its own pipeline;
If ``list[dict]``, it will be assigned to the dataset whose
pipeline is None in datasets;
If ``list[list[dict]]``, pipeline of dataset which is None
in datasets will be replaced by the corresponding pipeline
in the list.
force_apply (bool): If True, apply pipeline above to each dataset
even if it have its own pipeline. Default: False.
"""
def __init__(self,
datasets,
separate_eval=True,
show_mean_scores='auto',
pipeline=None,
force_apply=False,
**kwargs):
new_datasets = []
if pipeline is not None:
assert isinstance(
pipeline,
list), 'pipeline must be list[dict] or list[list[dict]].'
if is_type_list(pipeline, dict):
self._apply_pipeline(datasets, pipeline, force_apply)
new_datasets = datasets
elif is_2dlist(pipeline):
assert is_2dlist(datasets)
assert len(datasets) == len(pipeline)
for sub_datasets, tmp_pipeline in zip(datasets, pipeline):
self._apply_pipeline(sub_datasets, tmp_pipeline,
force_apply)
new_datasets.extend(sub_datasets)
else:
if is_2dlist(datasets):
for sub_datasets in datasets:
new_datasets.extend(sub_datasets)
else:
new_datasets = datasets
datasets = [build_dataset(c, kwargs) for c in new_datasets]
super().__init__(datasets, separate_eval)
if not separate_eval:
raise NotImplementedError(
'Evaluating datasets as a whole is not'
' supported yet. Please use "separate_eval=True"')
assert isinstance(show_mean_scores, bool) or show_mean_scores == 'auto'
if show_mean_scores == 'auto':
show_mean_scores = len(self.datasets) > 1
self.show_mean_scores = show_mean_scores
if show_mean_scores is True or show_mean_scores == 'auto' and len(
self.datasets) > 1:
if len(set([type(ds) for ds in self.datasets])) != 1:
raise NotImplementedError(
'To compute mean evaluation scores, all datasets'
'must have the same type')
@staticmethod
def _apply_pipeline(datasets, pipeline, force_apply=False):
from_cfg = all(isinstance(x, dict) for x in datasets)
assert from_cfg, 'datasets should be config dicts'
assert all(isinstance(x, dict) for x in pipeline)
for dataset in datasets:
if dataset['pipeline'] is None or force_apply:
dataset['pipeline'] = copy.deepcopy(pipeline)
[docs] def evaluate(self, results, logger=None, **kwargs):
"""Evaluate the results.
Args:
results (list[list | tuple]): Testing results of the dataset.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
Returns:
dict[str: float]: Results of each separate
dataset if `self.separate_eval=True`.
"""
assert len(results) == self.cumulative_sizes[-1], \
('Dataset and results have different sizes: '
f'{self.cumulative_sizes[-1]} v.s. {len(results)}')
# Check whether all the datasets support evaluation
for dataset in self.datasets:
assert hasattr(dataset, 'evaluate'), \
f'{type(dataset)} does not implement evaluate function'
if self.separate_eval:
dataset_idx = -1
total_eval_results = dict()
if self.show_mean_scores:
mean_eval_results = defaultdict(list)
for dataset in self.datasets:
start_idx = 0 if dataset_idx == -1 else \
self.cumulative_sizes[dataset_idx]
end_idx = self.cumulative_sizes[dataset_idx + 1]
results_per_dataset = results[start_idx:end_idx]
print_log(
f'\nEvaluating {dataset.ann_file} with '
f'{len(results_per_dataset)} images now',
logger=logger)
eval_results_per_dataset = dataset.evaluate(
results_per_dataset, logger=logger, **kwargs)
dataset_idx += 1
for k, v in eval_results_per_dataset.items():
total_eval_results.update({f'{dataset_idx}_{k}': v})
if self.show_mean_scores:
mean_eval_results[k].append(v)
if self.show_mean_scores:
for k, v in mean_eval_results.items():
total_eval_results[f'mean_{k}'] = np.mean(v)
return total_eval_results
else:
raise NotImplementedError(
'Evaluating datasets as a whole is not'
' supported yet. Please use "separate_eval=True"')