CDCgov
Showing with 75 additions and 6 deletions.

+68 −5 OCR/ocr/api.py

+7 −1 OCR/pyproject.toml
diff --git a/OCR/ocr/api.py b/OCR/ocr/api.py
@@ -1,3 +1,4 @@
+"""Module for FastAPI interface functions."""
 import base64
 
 import uvicorn
@@ -34,7 +35,18 @@
 ocr = TesseractOCR()
 
 
-def data_uri_to_image(data_uri: str):
+def data_uri_to_image(data_uri: str) -> np.ndarray:
+    """Converts a base64 encoded data URI to an image, represented as a NumPy array.
+
+    Args:
+        data_uri (str): The base64 encoded image in data URI format.
+
+    Returns:
+        np.ndarray: The decoded image in NumPy array format.
+
+    Raises:
+        HTTPException: If the image decoding fails.
+    """
     try:
         base64_data = data_uri.split(",")[1]
         image_data = base64.b64decode(base64_data)
@@ -48,18 +60,40 @@ def data_uri_to_image(data_uri: str):
         )
 
 
-def image_to_data_uri(image: np.ndarray):
+def image_to_data_uri(image: np.ndarray) -> bytes:
+    """Converts an image to a base64 encoded data URI.
+
+    Args:
+        image (np.ndarray): The input image in NumPy array format.
+
+    Returns:
+        bytes: The Base64 encoded data URI representation of the image.
+    """
     _, encoded = cv.imencode(".png", image)
     return b"data:image/png;base64," + base64.b64encode(encoded)
 
 
 @app.get("/")
 async def health_check():
+    """Health check endpoint to verify the API is running.
+
+    Returns:
+        dict: A dictionary with the status of the service.
+    """
     return {"status": "UP"}
 
 
 @app.post("/image_alignment/")
-async def image_alignment(source_image: str = Form(), segmentation_template: str = Form()):
+async def image_alignment(source_image: str = Form(), segmentation_template: str = Form()) -> dict:
+    """Aligns a source image to a segmentation template.
+
+    Args:
+        source_image (str): The base64 encoded source image.
+        segmentation_template (str): The baSe64 encoded segmentation template image.
+
+    Returns:
+        dict: A dictionary containing the aligned image as a base64 encoded data URI.
+    """
     source_image_img = data_uri_to_image(source_image)
     segmentation_template_img = data_uri_to_image(segmentation_template)
 
@@ -70,6 +104,19 @@ async def image_alignment(source_image: str = Form(), segmentation_template: str
 
 @app.post("/image_file_to_text/")
 async def image_file_to_text(source_image: UploadFile, segmentation_template: UploadFile, labels: str = Form()):
+    """Extracts text from an image file based on a segmentation template, using OCR.
+
+    Args:
+        source_image (UploadFile): The uploaded source image file.
+        segmentation_template (UploadFile): The uploaded segmentation template file.
+        labels (str): The JSON-encoded labels defining segmentation templates.
+
+    Returns:
+        dict: A dictionary containing the OCR results for the segmented regions.
+
+    Raises:
+        HTTPException: If there are issues with file decoding, parsing, segmentation, or OCR.
+    """
     try:
         source_image_np = np.frombuffer(await source_image.read(), np.uint8)
         source_image_img = cv.imdecode(source_image_np, cv.IMREAD_COLOR)
@@ -117,7 +164,20 @@ async def image_file_to_text(source_image: UploadFile, segmentation_template: Up
 @app.post("/image_to_text")
 async def image_to_text(
     source_image: str = Form(...), segmentation_template: str = Form(...), labels: str = Form(...)
-):
+) -> dict:
+    """Extracts text from an image based on a segmentation template, using OCR.
+
+    Args:
+        source_image (str): The base64-encoded source image.
+        segmentation_template (str): The base64-encoded segmentation template.
+        labels (str): The JSON-encoded labels defining segmentation templates.
+
+    Returns:
+        dict: A dictionary containing the OCR results for the segmented regions.
+
+    Raises:
+        HTTPException: If there are issues with file decoding, parsing, segmentation, or OCR.
+    """
     try:
         source_image_img = data_uri_to_image(source_image)
         segmentation_template_img = data_uri_to_image(segmentation_template)
@@ -149,5 +209,8 @@ async def image_to_text(
 
 
 def start():
-    """Launched with `poetry run start` at root level."""
+    """Starts the FastAPI app.
+
+    Launched with `poetry run start` at root level.
+    """
     uvicorn.run(app, host="0.0.0.0", port=8000, reload=False)
diff --git a/OCR/pyproject.toml b/OCR/pyproject.toml
@@ -46,9 +46,15 @@ select = ["D"]
 convention = "google"
 
 [tool.ruff.lint.per-file-ignores]
-# Ignore test directories, init.py, and CLI interfaces
+# Ignore test directories
 "tests/**" = ["D"]
+
+# Ignore module-level init.py
 "__init__.py" = ["D"]
+
+# Ignore CLI entry points
 "main.py" = ["D"]
 "**_main.py" = ["D"]
 
+# Ignore phdc_converter (already documented in a different style)
+"ocr/services/phdc_converter/**" = ["D"]