File size: 1,176 Bytes
d9028d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from .constants import MODEL_PATH
from rapidocr_onnxruntime import RapidOCR
from numpy.typing import NDArray
from os import PathLike
from shapely import Polygon
from shapely.geometry import box, MultiPolygon
from shapely.ops import unary_union
from typing import cast

_rapid_ocr = RapidOCR(rec_model_path=MODEL_PATH)


def ocr(image: NDArray | PathLike, esp: float = 0.5) -> list[tuple[Polygon, str]]:
    result, _ = _rapid_ocr(image)
    result.sort(key=lambda x: x[0][0][1])
    texts: list[str] = []
    shapes: list[Polygon] = []
    for points, text, _ in result:
        x, y, xmax, ymax = Polygon(points).bounds
        ymax += (ymax - y) * esp
        shapes.append(box(x, y, xmax, ymax))
        texts.append(text)
    merged_shapes = cast(MultiPolygon, unary_union(shapes)).geoms
    merged_texts = [""] * len(merged_shapes)
    visited = [False] * len(texts)
    for i, shape in enumerate(merged_shapes):
        for j, text in enumerate(texts):
            if not visited[j]:
                if shapes[j].intersects(shape):
                    merged_texts[i] += f" {text}"
                    visited[j] = True
    return list(zip(merged_shapes, merged_texts))