Shortcuts

Note

You are reading the documentation for MMOCR 0.x, which will soon be deprecated by the end of 2022. We recommend you upgrade to MMOCR 1.0 to enjoy fruitful new features and better performance brought by OpenMMLab 2.0. Check out the maintenance plan, changelog, code and documentation of MMOCR 1.0 for more details.

Source code for mmocr.models.textrecog.convertors.seg

# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np
import torch

import mmocr.utils as utils
from mmocr.models.builder import CONVERTORS
from .base import BaseConvertor


[docs]@CONVERTORS.register_module() class SegConvertor(BaseConvertor): """Convert between text, index and tensor for segmentation based pipeline. Args: dict_type (str): Type of dict, should be either 'DICT36' or 'DICT90'. dict_file (None|str): Character dict file path. If not none, the file is of higher priority than dict_type. dict_list (None|list[str]): Character list. If not none, the list is of higher priority than dict_type, but lower than dict_file. with_unknown (bool): If True, add `UKN` token to class. lower (bool): If True, convert original string to lower case. """ def __init__(self, dict_type='DICT36', dict_file=None, dict_list=None, with_unknown=True, lower=False, **kwargs): super().__init__(dict_type, dict_file, dict_list) assert isinstance(with_unknown, bool) assert isinstance(lower, bool) self.with_unknown = with_unknown self.lower = lower self.update_dict() def update_dict(self): # background self.idx2char.insert(0, '<BG>') # unknown self.unknown_idx = None if self.with_unknown: self.idx2char.append('<UKN>') self.unknown_idx = len(self.idx2char) - 1 # update char2idx self.char2idx = {} for idx, char in enumerate(self.idx2char): self.char2idx[char] = idx
[docs] def tensor2str(self, output, img_metas=None): """Convert model output tensor to string labels. Args: output (tensor): Model outputs with size: N * C * H * W img_metas (list[dict]): Each dict contains one image info. Returns: texts (list[str]): Decoded text labels. scores (list[list[float]]): Decoded chars scores. """ assert utils.is_type_list(img_metas, dict) assert len(img_metas) == output.size(0) texts, scores = [], [] for b in range(output.size(0)): seg_pred = output[b].detach() valid_width = int( output.size(-1) * img_metas[b]['valid_ratio'] + 1) seg_res = torch.argmax( seg_pred[:, :, :valid_width], dim=0).cpu().numpy().astype(np.int32) seg_thr = np.where(seg_res == 0, 0, 255).astype(np.uint8) _, labels, stats, centroids = cv2.connectedComponentsWithStats( seg_thr) component_num = stats.shape[0] all_res = [] for i in range(component_num): temp_loc = (labels == i) temp_value = seg_res[temp_loc] temp_center = centroids[i] temp_max_num = 0 temp_max_cls = -1 temp_total_num = 0 for c in range(len(self.idx2char)): c_num = np.sum(temp_value == c) temp_total_num += c_num if c_num > temp_max_num: temp_max_num = c_num temp_max_cls = c if temp_max_cls == 0: continue temp_max_score = 1.0 * temp_max_num / temp_total_num all_res.append( [temp_max_cls, temp_center, temp_max_num, temp_max_score]) all_res = sorted(all_res, key=lambda s: s[1][0]) chars, char_scores = [], [] for res in all_res: temp_area = res[2] if temp_area < 20: continue temp_char_index = res[0] if temp_char_index >= len(self.idx2char): temp_char = '' elif temp_char_index <= 0: temp_char = '' elif temp_char_index == self.unknown_idx: temp_char = '' else: temp_char = self.idx2char[temp_char_index] chars.append(temp_char) char_scores.append(res[3]) text = ''.join(chars) texts.append(text) scores.append(char_scores) return texts, scores
Read the Docs v: v0.6.3
Versions
latest
stable
v0.6.3
v0.6.2
v0.6.1
v0.6.0
v0.5.0
v0.4.1
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
dev-1.x
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.