diff --git a/OCR/ocr/services/image_ocr.py b/OCR/ocr/services/image_ocr.py index 74336bd7..c483bdb4 100644 --- a/OCR/ocr/services/image_ocr.py +++ b/OCR/ocr/services/image_ocr.py @@ -79,7 +79,7 @@ def deskew_image_text(self, image: np.ndarray, line_length_prop=0.5, max_skew_an rotation_mat = cv.getRotationMatrix2D((image.shape[1] / 2, image.shape[0] / 2), skew_angle, 1) return cv.warpAffine(np.array(image, dtype=np.uint8), rotation_mat, (image.shape[1], image.shape[0])) - def split_text_blocks(self, image: np.ndarray, line_length_prop=0.5) -> np.ndarray: + def split_text_blocks(self, image: np.ndarray, line_length_prop=0.5) -> list[np.ndarray]: """ Splits an image with text in it into possibly multiple images, one for each line. @@ -101,6 +101,7 @@ def split_text_blocks(self, image: np.ndarray, line_length_prop=0.5) -> np.ndarr # Simplify each contour into a bounding box bbox = [cv.boundingRect(contour) for contour in contours] + acc = [] # Merge overlapping bounding boxes, then sort the bounding boxes by y-position (top to bottom) for x, y, w, h in sorted(self.merge_bounding_boxes(bbox), key=lambda x: x[1]): # Filter lines that are too tiny and probably invalid @@ -108,7 +109,13 @@ def split_text_blocks(self, image: np.ndarray, line_length_prop=0.5) -> np.ndarr continue res = rotated[y : (y + h), x : (x + w)] - yield res + acc.append(res) + + # If we skipped all potential text blocks due to filtering conditions, return the + # original image anyway. + if len(acc) == 0: + return [image] + return acc def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, tuple[str, float]]: digitized: dict[str, tuple[str, float]] = {} @@ -119,7 +126,7 @@ def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, tuple[str, generated_text = [] confidence = [] - text_blocks = list(self.split_text_blocks(image)) + text_blocks = self.split_text_blocks(image) # Ignore output from `split_text_blocks` algorithm if only one text block is detected if len(text_blocks) == 1: diff --git a/OCR/tests/ocr_test.py b/OCR/tests/ocr_test.py index b41a01e7..d8cd5277 100644 --- a/OCR/tests/ocr_test.py +++ b/OCR/tests/ocr_test.py @@ -1,5 +1,6 @@ import os +import numpy as np import cv2 as cv from ocr.services.image_segmenter import ( @@ -18,6 +19,12 @@ class TestOCR: + def test_split_text_blocks(self): + ocr = ImageOCR() + img = np.ones([10, 10, 3], np.uint8) + result = ocr.split_text_blocks(img) + assert np.array_equiv(result, img) + def test_ocr_printed(self): segmenter = ImageSegmenter( segmentation_function=segment_by_color_bounding_box,