Shortcuts

Note

You are reading the documentation for MMOCR 0.x, which will soon be deprecated by the end of 2022. We recommend you upgrade to MMOCR 1.0 to enjoy fruitful new features and better performance brought by OpenMMLab 2.0. Check out the maintenance plan, changelog, code and documentation of MMOCR 1.0 for more details.

Source code for mmocr.datasets.utils.parser

# Copyright (c) OpenMMLab. All rights reserved.
import json
import warnings

from mmocr.datasets.builder import PARSERS
from mmocr.utils import StringStrip


[docs]@PARSERS.register_module() class LineStrParser: """Parse string of one line in annotation file to dict format. Args: keys (list[str]): Keys in result dict. keys_idx (list[int]): Value index in sub-string list for each key above. separator (str): Separator to separate string to list of sub-string. """ def __init__(self, keys=['filename', 'text'], keys_idx=[0, 1], separator=' ', **kwargs): assert isinstance(keys, list) assert isinstance(keys_idx, list) assert isinstance(separator, str) assert len(keys) > 0 assert len(keys) == len(keys_idx) self.keys = keys self.keys_idx = keys_idx self.separator = separator self.strip_cls = StringStrip(**kwargs) def get_item(self, data_ret, index): map_index = index % len(data_ret) line_str = data_ret[map_index] line_str = self.strip_cls(line_str) if len(line_str.split(' ')) > 2: msg = 'More than two blank spaces were detected. ' msg += 'Please use LineJsonParser to handle ' msg += 'annotations with blanks. ' msg += 'Check Doc ' msg += 'https://mmocr.readthedocs.io/en/latest/' msg += 'tutorials/blank_recog.html ' msg += 'for details.' warnings.warn(msg) line_str = line_str.split(self.separator) if len(line_str) <= max(self.keys_idx): raise Exception( f'key index: {max(self.keys_idx)} out of range: {line_str}') line_info = {} for i, key in enumerate(self.keys): line_info[key] = line_str[self.keys_idx[i]] return line_info
[docs]@PARSERS.register_module() class LineJsonParser: """Parse json-string of one line in annotation file to dict format. Args: keys (list[str]): Keys in both json-string and result dict. """ def __init__(self, keys=[]): assert isinstance(keys, list) assert len(keys) > 0 self.keys = keys def get_item(self, data_ret, index): map_index = index % len(data_ret) json_str = data_ret[map_index] line_json_obj = json.loads(json_str) line_info = {} for key in self.keys: if key not in line_json_obj: raise Exception(f'key {key} not in line json {line_json_obj}') line_info[key] = line_json_obj[key] return line_info
Read the Docs v: v0.6.3
Versions
latest
stable
v0.6.3
v0.6.2
v0.6.1
v0.6.0
v0.5.0
v0.4.1
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
dev-1.x
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.