gingdev's picture
chore: wip
d9028d2
from .constants import MODEL_PATH
from rapidocr_onnxruntime import RapidOCR
from numpy.typing import NDArray
from os import PathLike
from shapely import Polygon
from shapely.geometry import box, MultiPolygon
from shapely.ops import unary_union
from typing import cast
_rapid_ocr = RapidOCR(rec_model_path=MODEL_PATH)
def ocr(image: NDArray | PathLike, esp: float = 0.5) -> list[tuple[Polygon, str]]:
result, _ = _rapid_ocr(image)
result.sort(key=lambda x: x[0][0][1])
texts: list[str] = []
shapes: list[Polygon] = []
for points, text, _ in result:
x, y, xmax, ymax = Polygon(points).bounds
ymax += (ymax - y) * esp
shapes.append(box(x, y, xmax, ymax))
texts.append(text)
merged_shapes = cast(MultiPolygon, unary_union(shapes)).geoms
merged_texts = [""] * len(merged_shapes)
visited = [False] * len(texts)
for i, shape in enumerate(merged_shapes):
for j, text in enumerate(texts):
if not visited[j]:
if shapes[j].intersects(shape):
merged_texts[i] += f" {text}"
visited[j] = True
return list(zip(merged_shapes, merged_texts))