Skip to content

Commit

Permalink
improve "phrase" kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
jonchang committed Oct 18, 2024
1 parent 692cbdb commit 2d5bf9a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
14 changes: 9 additions & 5 deletions OCR/ocr/services/image_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def merge_bounding_boxes(boxes: list) -> Iterator[list]:

def identify_blocks(self, input_image: np.ndarray, kernel: np.ndarray):
"""
Given an input image and a morphological operation kernel, return bounding boxes of
Given an input image and a morphological operation kernel, returns unique (non-overlapping)
bounding boxes of potential text regions.
"""
# Invert threshold `input_image` and dilate using `kernel` to "expand" the size of text blocks
_, thresh = cv.threshold(cv.cvtColor(input_image, cv.COLOR_BGR2GRAY), 128, 255, cv.THRESH_BINARY_INV)
Expand Down Expand Up @@ -93,7 +94,6 @@ def deskew_image_text(self, image: np.ndarray, line_length_prop=0.5, max_skew_an
rotation_mat = cv.getRotationMatrix2D((image.shape[1] / 2, image.shape[0] / 2), skew_angle, 1)
return cv.warpAffine(np.array(image, dtype=np.uint8), rotation_mat, (image.shape[1], image.shape[0]))


def split_text_blocks(self, image: np.ndarray, line_length_prop=0.5) -> list[np.ndarray]:
"""
Splits an image with text in it into possibly multiple images, one for each line.
Expand All @@ -106,18 +106,22 @@ def split_text_blocks(self, image: np.ndarray, line_length_prop=0.5) -> list[np.
# Kernels for morphological operations.
# Kernel height of 1 implies a minimum separation between lines of 1px
line_kernel = np.ones([1, int(line_length)], np.uint8)
# 3x3 cross-shaped kernel to help identify words in blank space.
word_kernel = cv.getStructuringElement(cv.MORPH_CROSS, (3, 3))
# 11x5 cross-shaped kernel to help identify words in blank space.
word_kernel = cv.getStructuringElement(cv.MORPH_CROSS, (11, 5))

acc = []

# Sort identified lines by y-position (top to bottom)
for x, y, w, h in sorted(self.identify_blocks(rotated, line_kernel), key=lambda x: x[1]):
# Filter lines that are too tiny and probably invalid
if h < 5:
continue

res = rotated[y : (y + h), x : (x + w)]

# Sort identified text blocks (putative words or phrases) by x-position (left to right)
for x, y, w, h in sorted(self.identify_blocks(res, word_kernel), key=lambda x: x[0]):
acc.append(res[y:(y+h), x:(x+w)])
acc.append(res[y : (y + h), x : (x + w)])

# If we skipped all potential text blocks due to filtering conditions, return the
# original image anyway.
Expand Down
2 changes: 1 addition & 1 deletion OCR/tests/ocr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_ocr_paragraph(self):
text, confidence = results["text"]
assert (
text
== "THIS TEST WAS DEVELOPED AND ITS ANALYTICAL PERFORMANCE CHARACTERISTICS HAVE BEEN DETERMINED BY QUEST DIAGNOSTICS NICHOLS INSTITUTE SAN JUAN CAPISTRAND. IT HAS NOT BEEN CLEARED OR APPROVED BY FDA. THIS ASSAY HAS BEEN VALIDATED PURSUANT TO THE CLIA REGULATIONS AND IS USED FOR CLINICAL PURPOSES."
== "THIS TEST WAS DEVELOPED AND ITS ANALYTICAL PERFORMANCE CHARACTERISTICS HAVE BEEN DETERMINED BY QUEST DIAGNOSTICS NICHOLS INSTITUTE SAN JUAN CAPISTRANO. IT HAS NOT BEEN CLEARED OR APPROVED BY FDA. THIS ASSAY HAS BEEN VALIDATED PURSUANT TO THE CLIA REGULATIONS AND IS USED FOR CLINICAL PURPOSES."
)
assert confidence > 50

Expand Down

0 comments on commit 2d5bf9a

Please sign in to comment.