Shortcuts

Note

You are reading the documentation for MMOCR 0.x, which will soon be deprecated by the end of 2022. We recommend you upgrade to MMOCR 1.0 to enjoy fruitful new features and better performance brought by OpenMMLab 2.0. Check out the maintenance plan, changelog, code and documentation of MMOCR 1.0 for more details.

Source code for mmocr.datasets.pipelines.textdet_targets.fcenet_targets

# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np
from mmdet.datasets.builder import PIPELINES
from numpy.fft import fft
from numpy.linalg import norm

import mmocr.utils.check_argument as check_argument
from .textsnake_targets import TextSnakeTargets


[docs]@PIPELINES.register_module() class FCENetTargets(TextSnakeTargets): """Generate the ground truth targets of FCENet: Fourier Contour Embedding for Arbitrary-Shaped Text Detection. [https://arxiv.org/abs/2104.10442] Args: fourier_degree (int): The maximum Fourier transform degree k. resample_step (float): The step size for resampling the text center line (TCL). It's better not to exceed half of the minimum width. center_region_shrink_ratio (float): The shrink ratio of text center region. level_size_divisors (tuple(int)): The downsample ratio on each level. level_proportion_range (tuple(tuple(int))): The range of text sizes assigned to each level. """ def __init__(self, fourier_degree=5, resample_step=4.0, center_region_shrink_ratio=0.3, level_size_divisors=(8, 16, 32), level_proportion_range=((0, 0.4), (0.3, 0.7), (0.6, 1.0))): super().__init__() assert isinstance(level_size_divisors, tuple) assert isinstance(level_proportion_range, tuple) assert len(level_size_divisors) == len(level_proportion_range) self.fourier_degree = fourier_degree self.resample_step = resample_step self.center_region_shrink_ratio = center_region_shrink_ratio self.level_size_divisors = level_size_divisors self.level_proportion_range = level_proportion_range
[docs] def generate_center_region_mask(self, img_size, text_polys): """Generate text center region mask. Args: img_size (tuple): The image size of (height, width). text_polys (list[list[ndarray]]): The list of text polygons. Returns: center_region_mask (ndarray): The text center region mask. """ assert isinstance(img_size, tuple) assert check_argument.is_2dlist(text_polys) h, w = img_size center_region_mask = np.zeros((h, w), np.uint8) center_region_boxes = [] for poly in text_polys: assert len(poly) == 1 polygon_points = poly[0].reshape(-1, 2) _, _, top_line, bot_line = self.reorder_poly_edge(polygon_points) resampled_top_line, resampled_bot_line = self.resample_sidelines( top_line, bot_line, self.resample_step) resampled_bot_line = resampled_bot_line[::-1] center_line = (resampled_top_line + resampled_bot_line) / 2 line_head_shrink_len = norm(resampled_top_line[0] - resampled_bot_line[0]) / 4.0 line_tail_shrink_len = norm(resampled_top_line[-1] - resampled_bot_line[-1]) / 4.0 head_shrink_num = int(line_head_shrink_len // self.resample_step) tail_shrink_num = int(line_tail_shrink_len // self.resample_step) if len(center_line) > head_shrink_num + tail_shrink_num + 2: center_line = center_line[head_shrink_num:len(center_line) - tail_shrink_num] resampled_top_line = resampled_top_line[ head_shrink_num:len(resampled_top_line) - tail_shrink_num] resampled_bot_line = resampled_bot_line[ head_shrink_num:len(resampled_bot_line) - tail_shrink_num] for i in range(0, len(center_line) - 1): tl = center_line[i] + (resampled_top_line[i] - center_line[i] ) * self.center_region_shrink_ratio tr = center_line[i + 1] + ( resampled_top_line[i + 1] - center_line[i + 1]) * self.center_region_shrink_ratio br = center_line[i + 1] + ( resampled_bot_line[i + 1] - center_line[i + 1]) * self.center_region_shrink_ratio bl = center_line[i] + (resampled_bot_line[i] - center_line[i] ) * self.center_region_shrink_ratio current_center_box = np.vstack([tl, tr, br, bl]).astype(np.int32) center_region_boxes.append(current_center_box) cv2.fillPoly(center_region_mask, center_region_boxes, 1) return center_region_mask
[docs] def resample_polygon(self, polygon, n=400): """Resample one polygon with n points on its boundary. Args: polygon (list[float]): The input polygon. n (int): The number of resampled points. Returns: resampled_polygon (list[float]): The resampled polygon. """ length = [] for i in range(len(polygon)): p1 = polygon[i] if i == len(polygon) - 1: p2 = polygon[0] else: p2 = polygon[i + 1] length.append(((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)**0.5) total_length = sum(length) n_on_each_line = (np.array(length) / (total_length + 1e-8)) * n n_on_each_line = n_on_each_line.astype(np.int32) new_polygon = [] for i in range(len(polygon)): num = n_on_each_line[i] p1 = polygon[i] if i == len(polygon) - 1: p2 = polygon[0] else: p2 = polygon[i + 1] if num == 0: continue dxdy = (p2 - p1) / num for j in range(num): point = p1 + dxdy * j new_polygon.append(point) return np.array(new_polygon)
[docs] def normalize_polygon(self, polygon): """Normalize one polygon so that its start point is at right most. Args: polygon (list[float]): The origin polygon. Returns: new_polygon (lost[float]): The polygon with start point at right. """ temp_polygon = polygon - polygon.mean(axis=0) x = np.abs(temp_polygon[:, 0]) y = temp_polygon[:, 1] index_x = np.argsort(x) index_y = np.argmin(y[index_x[:8]]) index = index_x[index_y] new_polygon = np.concatenate([polygon[index:], polygon[:index]]) return new_polygon
[docs] def poly2fourier(self, polygon, fourier_degree): """Perform Fourier transformation to generate Fourier coefficients ck from polygon. Args: polygon (ndarray): An input polygon. fourier_degree (int): The maximum Fourier degree K. Returns: c (ndarray(complex)): Fourier coefficients. """ points = polygon[:, 0] + polygon[:, 1] * 1j c_fft = fft(points) / len(points) c = np.hstack((c_fft[-fourier_degree:], c_fft[:fourier_degree + 1])) return c
[docs] def clockwise(self, c, fourier_degree): """Make sure the polygon reconstructed from Fourier coefficients c in the clockwise direction. Args: polygon (list[float]): The origin polygon. Returns: new_polygon (lost[float]): The polygon in clockwise point order. """ if np.abs(c[fourier_degree + 1]) > np.abs(c[fourier_degree - 1]): return c elif np.abs(c[fourier_degree + 1]) < np.abs(c[fourier_degree - 1]): return c[::-1] else: if np.abs(c[fourier_degree + 2]) > np.abs(c[fourier_degree - 2]): return c else: return c[::-1]
[docs] def cal_fourier_signature(self, polygon, fourier_degree): """Calculate Fourier signature from input polygon. Args: polygon (ndarray): The input polygon. fourier_degree (int): The maximum Fourier degree K. Returns: fourier_signature (ndarray): An array shaped (2k+1, 2) containing real part and image part of 2k+1 Fourier coefficients. """ resampled_polygon = self.resample_polygon(polygon) resampled_polygon = self.normalize_polygon(resampled_polygon) fourier_coeff = self.poly2fourier(resampled_polygon, fourier_degree) fourier_coeff = self.clockwise(fourier_coeff, fourier_degree) real_part = np.real(fourier_coeff).reshape((-1, 1)) image_part = np.imag(fourier_coeff).reshape((-1, 1)) fourier_signature = np.hstack([real_part, image_part]) return fourier_signature
[docs] def generate_fourier_maps(self, img_size, text_polys): """Generate Fourier coefficient maps. Args: img_size (tuple): The image size of (height, width). text_polys (list[list[ndarray]]): The list of text polygons. Returns: fourier_real_map (ndarray): The Fourier coefficient real part maps. fourier_image_map (ndarray): The Fourier coefficient image part maps. """ assert isinstance(img_size, tuple) assert check_argument.is_2dlist(text_polys) h, w = img_size k = self.fourier_degree real_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) imag_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) for poly in text_polys: assert len(poly) == 1 text_instance = [[poly[0][i], poly[0][i + 1]] for i in range(0, len(poly[0]), 2)] mask = np.zeros((h, w), dtype=np.uint8) polygon = np.array(text_instance).reshape((1, -1, 2)) cv2.fillPoly(mask, polygon.astype(np.int32), 1) fourier_coeff = self.cal_fourier_signature(polygon[0], k) for i in range(-k, k + 1): if i != 0: real_map[i + k, :, :] = mask * fourier_coeff[i + k, 0] + ( 1 - mask) * real_map[i + k, :, :] imag_map[i + k, :, :] = mask * fourier_coeff[i + k, 1] + ( 1 - mask) * imag_map[i + k, :, :] else: yx = np.argwhere(mask > 0.5) k_ind = np.ones((len(yx)), dtype=np.int64) * k y, x = yx[:, 0], yx[:, 1] real_map[k_ind, y, x] = fourier_coeff[k, 0] - x imag_map[k_ind, y, x] = fourier_coeff[k, 1] - y return real_map, imag_map
[docs] def generate_level_targets(self, img_size, text_polys, ignore_polys): """Generate ground truth target on each level. Args: img_size (list[int]): Shape of input image. text_polys (list[list[ndarray]]): A list of ground truth polygons. ignore_polys (list[list[ndarray]]): A list of ignored polygons. Returns: level_maps (list(ndarray)): A list of ground target on each level. """ h, w = img_size lv_size_divs = self.level_size_divisors lv_proportion_range = self.level_proportion_range lv_text_polys = [[] for i in range(len(lv_size_divs))] lv_ignore_polys = [[] for i in range(len(lv_size_divs))] level_maps = [] for poly in text_polys: assert len(poly) == 1 text_instance = [[poly[0][i], poly[0][i + 1]] for i in range(0, len(poly[0]), 2)] polygon = np.array(text_instance, dtype=np.int).reshape((1, -1, 2)) _, _, box_w, box_h = cv2.boundingRect(polygon) proportion = max(box_h, box_w) / (h + 1e-8) for ind, proportion_range in enumerate(lv_proportion_range): if proportion_range[0] < proportion < proportion_range[1]: lv_text_polys[ind].append([poly[0] / lv_size_divs[ind]]) for ignore_poly in ignore_polys: assert len(ignore_poly) == 1 text_instance = [[ignore_poly[0][i], ignore_poly[0][i + 1]] for i in range(0, len(ignore_poly[0]), 2)] polygon = np.array(text_instance, dtype=np.int).reshape((1, -1, 2)) _, _, box_w, box_h = cv2.boundingRect(polygon) proportion = max(box_h, box_w) / (h + 1e-8) for ind, proportion_range in enumerate(lv_proportion_range): if proportion_range[0] < proportion < proportion_range[1]: lv_ignore_polys[ind].append( [ignore_poly[0] / lv_size_divs[ind]]) for ind, size_divisor in enumerate(lv_size_divs): current_level_maps = [] level_img_size = (h // size_divisor, w // size_divisor) text_region = self.generate_text_region_mask( level_img_size, lv_text_polys[ind])[None] current_level_maps.append(text_region) center_region = self.generate_center_region_mask( level_img_size, lv_text_polys[ind])[None] current_level_maps.append(center_region) effective_mask = self.generate_effective_mask( level_img_size, lv_ignore_polys[ind])[None] current_level_maps.append(effective_mask) fourier_real_map, fourier_image_maps = self.generate_fourier_maps( level_img_size, lv_text_polys[ind]) current_level_maps.append(fourier_real_map) current_level_maps.append(fourier_image_maps) level_maps.append(np.concatenate(current_level_maps)) return level_maps
[docs] def generate_targets(self, results): """Generate the ground truth targets for FCENet. Args: results (dict): The input result dictionary. Returns: results (dict): The output result dictionary. """ assert isinstance(results, dict) polygon_masks = results['gt_masks'].masks polygon_masks_ignore = results['gt_masks_ignore'].masks h, w, _ = results['img_shape'] level_maps = self.generate_level_targets((h, w), polygon_masks, polygon_masks_ignore) results['mask_fields'].clear() # rm gt_masks encoded by polygons mapping = { 'p3_maps': level_maps[0], 'p4_maps': level_maps[1], 'p5_maps': level_maps[2] } for key, value in mapping.items(): results[key] = value return results
Read the Docs v: v0.6.3
Versions
latest
stable
v0.6.3
v0.6.2
v0.6.1
v0.6.0
v0.5.0
v0.4.1
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
dev-1.x
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.