Shortcuts

Source code for mmocr.utils.bbox_utils

# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple

import numpy as np
from shapely.geometry import LineString, Point

from mmocr.utils.check_argument import is_type_list
from mmocr.utils.point_utils import point_distance, points_center
from mmocr.utils.typing_utils import ArrayLike


def rescale_bbox(bbox: np.ndarray,
                 scale_factor: Tuple[int, int],
                 mode: str = 'mul') -> np.ndarray:
    """Rescale a bounding box according to scale_factor.

    The behavior is different depending on the mode. When mode is 'mul', the
    coordinates will be multiplied by scale_factor, which is usually used in
    preprocessing transforms such as :func:`Resize`.
    The coordinates will be divided by scale_factor if mode is 'div'. It can be
    used in postprocessors to recover the bbox in the original image size.

    Args:
        bbox (ndarray): A bounding box [x1, y1, x2, y2].
        scale_factor (tuple(int, int)): (w_scale, h_scale).
        model (str): Rescale mode. Can be 'mul' or 'div'. Defaults to 'mul'.

    Returns:
        np.ndarray: Rescaled bbox.
    """
    assert mode in ['mul', 'div']
    bbox = np.array(bbox, dtype=np.float32)
    bbox_shape = bbox.shape
    reshape_bbox = bbox.reshape(-1, 2)
    scale_factor = np.array(scale_factor, dtype=float)
    if mode == 'div':
        scale_factor = 1 / scale_factor
    bbox = (reshape_bbox * scale_factor[None]).reshape(bbox_shape)
    return bbox


[docs]def rescale_bboxes(bboxes: np.ndarray, scale_factor: Tuple[int, int], mode: str = 'mul') -> np.ndarray: """Rescale bboxes according to scale_factor. The behavior is different depending on the mode. When mode is 'mul', the coordinates will be multiplied by scale_factor, which is usually used in preprocessing transforms such as :func:`Resize`. The coordinates will be divided by scale_factor if mode is 'div'. It can be used in postprocessors to recover the bboxes in the original image size. Args: bboxes (np.ndarray]): Bounding bboxes in shape (N, 4) scale_factor (tuple(int, int)): (w_scale, h_scale). model (str): Rescale mode. Can be 'mul' or 'div'. Defaults to 'mul'. Returns: list[np.ndarray]: Rescaled bboxes. """ bboxes = rescale_bbox(bboxes, scale_factor, mode) return bboxes
[docs]def bbox2poly(bbox: ArrayLike, mode: str = 'xyxy') -> np.array: """Converting a bounding box to a polygon. Args: bbox (ArrayLike): A bbox. In any form can be accessed by 1-D indices. E.g. list[float], np.ndarray, or torch.Tensor. bbox is written in [x1, y1, x2, y2]. mode (str): Specify the format of bbox. Can be 'xyxy' or 'xywh'. Defaults to 'xyxy'. Returns: np.array: The converted polygon [x1, y1, x2, y1, x2, y2, x1, y2]. """ assert len(bbox) == 4 if mode == 'xyxy': x1, y1, x2, y2 = bbox poly = np.array([x1, y1, x2, y1, x2, y2, x1, y2]) elif mode == 'xywh': x, y, w, h = bbox poly = np.array([x, y, x + w, y, x + w, y + h, x, y + h]) else: raise NotImplementedError('Not supported mode.') return poly
[docs]def is_on_same_line(box_a, box_b, min_y_overlap_ratio=0.8): # TODO Check if it should be deleted after ocr.py refactored """Check if two boxes are on the same line by their y-axis coordinates. Two boxes are on the same line if they overlap vertically, and the length of the overlapping line segment is greater than min_y_overlap_ratio * the height of either of the boxes. Args: box_a (list), box_b (list): Two bounding boxes to be checked min_y_overlap_ratio (float): The minimum vertical overlapping ratio allowed for boxes in the same line Returns: The bool flag indicating if they are on the same line """ a_y_min = np.min(box_a[1::2]) b_y_min = np.min(box_b[1::2]) a_y_max = np.max(box_a[1::2]) b_y_max = np.max(box_b[1::2]) # Make sure that box a is always the box above another if a_y_min > b_y_min: a_y_min, b_y_min = b_y_min, a_y_min a_y_max, b_y_max = b_y_max, a_y_max if b_y_min <= a_y_max: if min_y_overlap_ratio is not None: sorted_y = sorted([b_y_min, b_y_max, a_y_max]) overlap = sorted_y[1] - sorted_y[0] min_a_overlap = (a_y_max - a_y_min) * min_y_overlap_ratio min_b_overlap = (b_y_max - b_y_min) * min_y_overlap_ratio return overlap >= min_a_overlap or \ overlap >= min_b_overlap else: return True return False
[docs]def stitch_boxes_into_lines(boxes, max_x_dist=10, min_y_overlap_ratio=0.8): # TODO Check if it should be deleted after ocr.py refactored """Stitch fragmented boxes of words into lines. Note: part of its logic is inspired by @Johndirr (https://github.com/faustomorales/keras-ocr/issues/22) Args: boxes (list): List of ocr results to be stitched max_x_dist (int): The maximum horizontal distance between the closest edges of neighboring boxes in the same line min_y_overlap_ratio (float): The minimum vertical overlapping ratio allowed for any pairs of neighboring boxes in the same line Returns: merged_boxes(list[dict]): List of merged boxes and texts """ if len(boxes) <= 1: return boxes merged_boxes = [] # sort groups based on the x_min coordinate of boxes x_sorted_boxes = sorted(boxes, key=lambda x: np.min(x['box'][::2])) # store indexes of boxes which are already parts of other lines skip_idxs = set() i = 0 # locate lines of boxes starting from the leftmost one for i in range(len(x_sorted_boxes)): if i in skip_idxs: continue # the rightmost box in the current line rightmost_box_idx = i line = [rightmost_box_idx] for j in range(i + 1, len(x_sorted_boxes)): if j in skip_idxs: continue if is_on_same_line(x_sorted_boxes[rightmost_box_idx]['box'], x_sorted_boxes[j]['box'], min_y_overlap_ratio): line.append(j) skip_idxs.add(j) rightmost_box_idx = j # split line into lines if the distance between two neighboring # sub-lines' is greater than max_x_dist lines = [] line_idx = 0 lines.append([line[0]]) rightmost = np.max(x_sorted_boxes[line[0]]['box'][::2]) for k in range(1, len(line)): curr_box = x_sorted_boxes[line[k]] dist = np.min(curr_box['box'][::2]) - rightmost if dist > max_x_dist: line_idx += 1 lines.append([]) lines[line_idx].append(line[k]) rightmost = max(rightmost, np.max(curr_box['box'][::2])) # Get merged boxes for box_group in lines: merged_box = {} merged_box['text'] = ' '.join( [x_sorted_boxes[idx]['text'] for idx in box_group]) x_min, y_min = float('inf'), float('inf') x_max, y_max = float('-inf'), float('-inf') for idx in box_group: x_max = max(np.max(x_sorted_boxes[idx]['box'][::2]), x_max) x_min = min(np.min(x_sorted_boxes[idx]['box'][::2]), x_min) y_max = max(np.max(x_sorted_boxes[idx]['box'][1::2]), y_max) y_min = min(np.min(x_sorted_boxes[idx]['box'][1::2]), y_min) merged_box['box'] = [ x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max ] merged_boxes.append(merged_box) return merged_boxes
[docs]def bezier2polygon(bezier_points: np.ndarray, num_sample: int = 20) -> List[np.ndarray]: # TODO check test later """Sample points from the boundary of a polygon enclosed by two Bezier curves, which are controlled by ``bezier_points``. Args: bezier_points (ndarray): A :math:`(2, 4, 2)` array of 8 Bezeir points or its equalivance. The first 4 points control the curve at one side and the last four control the other side. num_sample (int): The number of sample points at each Bezeir curve. Defaults to 20. Returns: list[ndarray]: A list of 2*num_sample points representing the polygon extracted from Bezier curves. Warning: The points are not guaranteed to be ordered. Please use :func:`mmocr.utils.sort_points` to sort points if necessary. """ assert num_sample > 0, 'The sampling number should greater than 0' bezier_points = np.asarray(bezier_points) assert np.prod( bezier_points.shape) == 16, 'Need 8 Bezier control points to continue!' bezier = bezier_points.reshape(2, 4, 2).transpose(0, 2, 1).reshape(4, 4) u = np.linspace(0, 1, num_sample) points = np.outer((1 - u) ** 3, bezier[:, 0]) \ + np.outer(3 * u * ((1 - u) ** 2), bezier[:, 1]) \ + np.outer(3 * (u ** 2) * (1 - u), bezier[:, 2]) \ + np.outer(u ** 3, bezier[:, 3]) # Convert points to polygon points = np.concatenate((points[:, :2], points[:, 2:]), axis=0) return points.tolist()
def sort_vertex(points_x, points_y): # TODO Add typehints & docstring & test """Sort box vertices in clockwise order from left-top first. Args: points_x (list[float]): x of four vertices. points_y (list[float]): y of four vertices. Returns: sorted_points_x (list[float]): x of sorted four vertices. sorted_points_y (list[float]): y of sorted four vertices. """ assert is_type_list(points_x, (float, int)) assert is_type_list(points_y, (float, int)) assert len(points_x) == 4 assert len(points_y) == 4 vertices = np.stack((points_x, points_y), axis=-1).astype(np.float32) vertices = _sort_vertex(vertices) sorted_points_x = list(vertices[:, 0]) sorted_points_y = list(vertices[:, 1]) return sorted_points_x, sorted_points_y def _sort_vertex(vertices): # TODO Add typehints & docstring & test assert vertices.ndim == 2 assert vertices.shape[-1] == 2 N = vertices.shape[0] if N == 0: return vertices center = np.mean(vertices, axis=0) directions = vertices - center angles = np.arctan2(directions[:, 1], directions[:, 0]) sort_idx = np.argsort(angles) vertices = vertices[sort_idx] left_top = np.min(vertices, axis=0) dists = np.linalg.norm(left_top - vertices, axis=-1, ord=2) lefttop_idx = np.argmin(dists) indexes = (np.arange(N, dtype=np.int_) + lefttop_idx) % N return vertices[indexes] def sort_vertex8(points): # TODO Add typehints & docstring & test """Sort vertex with 8 points [x1 y1 x2 y2 x3 y3 x4 y4]""" assert len(points) == 8 vertices = _sort_vertex(np.array(points, dtype=np.float32).reshape(-1, 2)) sorted_box = list(vertices.flatten()) return sorted_box
[docs]def bbox_center_distance(box1: ArrayLike, box2: ArrayLike) -> float: """Calculate the distance between the center points of two bounding boxes. Args: box1 (ArrayLike): The first bounding box represented in [x1, y1, x2, y2]. box2 (ArrayLike): The second bounding box represented in [x1, y1, x2, y2]. Returns: float: The distance between the center points of two bounding boxes. """ return point_distance(points_center(box1), points_center(box2))
[docs]def bbox_diag_distance(box: ArrayLike) -> float: """Calculate the diagonal length of a bounding box (distance between the top-left and bottom-right). Args: box (ArrayLike): The bounding box represented in [x1, y1, x2, y2, x3, y3, x4, y4] or [x1, y1, x2, y2]. Returns: float: The diagonal length of the bounding box. """ box = np.array(box, dtype=np.float32) assert (box.size == 8 or box.size == 4) if box.size == 8: diag = point_distance(box[0:2], box[4:6]) elif box.size == 4: diag = point_distance(box[0:2], box[2:4]) return diag
def bbox_jitter(points_x, points_y, jitter_ratio_x=0.5, jitter_ratio_y=0.1): """Jitter on the coordinates of bounding box. Args: points_x (list[float | int]): List of y for four vertices. points_y (list[float | int]): List of x for four vertices. jitter_ratio_x (float): Horizontal jitter ratio relative to the height. jitter_ratio_y (float): Vertical jitter ratio relative to the height. """ assert len(points_x) == 4 assert len(points_y) == 4 assert isinstance(jitter_ratio_x, float) assert isinstance(jitter_ratio_y, float) assert 0 <= jitter_ratio_x < 1 assert 0 <= jitter_ratio_y < 1 points = [Point(points_x[i], points_y[i]) for i in range(4)] line_list = [ LineString([points[i], points[i + 1 if i < 3 else 0]]) for i in range(4) ] tmp_h = max(line_list[1].length, line_list[3].length) for i in range(4): jitter_pixel_x = (np.random.rand() - 0.5) * 2 * jitter_ratio_x * tmp_h jitter_pixel_y = (np.random.rand() - 0.5) * 2 * jitter_ratio_y * tmp_h points_x[i] += jitter_pixel_x points_y[i] += jitter_pixel_y
Read the Docs v: dev-1.x
Versions
latest
stable
v1.0.1
v1.0.0
0.x
v0.6.3
v0.6.2
v0.6.1
v0.6.0
v0.5.0
v0.4.1
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
dev-1.x
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.