Merge branch 'main' into fix/117-install-assets

CDCgov · Nov 6, 2024 · e3e8d6b · e3e8d6b
2 parents 15c7636 + 3e83d55
commit e3e8d6b
Show file tree

Hide file tree

Showing 16 changed files with 175 additions and 78 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -4,6 +4,7 @@ name: "release"
 #   - creation of a draft release
 # 2. Pushing a version tag:
 #   - building and pushing of a new Docker image to ghcr.io
+#   - building and uploading of the public documentation
 #   - creation of a published release
 
 on:
@@ -20,16 +21,14 @@ jobs:
       contents: "write"
       id-token: "write"
       packages: "write"
+      pages: "write"
 
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
         with:
           fetch-depth: 0 # Fetch all history for tags
 
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
       - name: Next tag
         run: |
           # Get the tag that triggered the workflow
@@ -44,6 +43,10 @@ jobs:
           echo "Next tag: $next_tag"
           echo "NEXT_TAG=$next_tag" >> $GITHUB_ENV
 
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        if: startsWith(github.ref, 'refs/tags/v')
+
       - name: Log in to GitHub Container Registry
         uses: docker/login-action@v2
         if: startsWith(github.ref, 'refs/tags/v')
@@ -68,6 +71,32 @@ jobs:
             ghcr.io/${{ env.PACKAGE_NAME }}:latest
             ghcr.io/${{ env.PACKAGE_NAME }}:${{ env.NEXT_TAG }}
 
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        if: startsWith(github.ref, 'refs/tags/v')
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+
+      - name: Build public documentation
+        if: startsWith(github.ref, 'refs/tags/v')
+        run: |
+          python -m pip install --upgrade pip
+          pip install '.[dev]'
+          export INITIAL_ALGORITHMS=""
+          export VERSION=${{ env.NEXT_TAG }}
+          ./scripts/build_docs.sh _site
+
+      - name: Upload public documentation
+        uses: actions/upload-pages-artifact@v3
+        if: startsWith(github.ref, 'refs/tags/v')
+        with:
+          path: _site/
+
+      - name: Deploy to GitHub Pages
+        uses: actions/deploy-pages@v4
+        if: startsWith(github.ref, 'refs/tags/v')
+
       - name: Optionally delete the existing draft release
         run: |
           # Get existing draft release (if any)

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -1,4 +1,4 @@
-site_name: "Record Linker Documentation"
+site_name: !ENV [SITE_NAME, 'RecordLinker Documentation']
 theme:
   name: "material"
   icon:
@@ -24,7 +24,6 @@ repo_name: CDCgov/RecordLinker
 repo_url: https://github.com/CDCgov/RecordLinker
 edit_uri: edit/main/docs/
 docs_dir: "site"
-site_dir: "../_site"
 nav:
   - "Home": "index.md"
   - "Getting Started":
@@ -33,3 +32,4 @@ nav:
   - "User Guide":
     - Design: "design.md"
     - Reference: "reference.md"
+  - "API Docs": "api-docs.html"
diff --git a/docs/site/reference.md b/docs/site/reference.md
@@ -111,7 +111,7 @@ patient data and used during query retrieval. The following blocking key types a
 These are the functions that can be used to evaluate the matching results as a collection, thus
 determining it the incoming payload is a match or not to an existing Patient record.
 
-`func:recordlinker.linking.matchers.exact_percent_match`
+`func:recordlinker.linking.matchers.eval_perfect_match`
 
 :   Determines whether a give set of feature comparisons represent a 'perfect' match
     (i.e. all features that were compared match in whatever criteria was specified).
@@ -152,8 +152,9 @@ existing Patient with the FIRST_NAME of ["John", "D"].
 
 `func:recordlinker.linking.matchers.feature_match_log_odds_fuzzy_compare`
 
-:   Similar to the above function, but uses a log-odds ratio to determine if the features are a match.
-    This is useful when comparing features that have a high cardinality and are not easily compared
-    using a string comparison. Use the `kwargs` parameter to specify the desired log-odds threshold,
-    including the fuzzy matching thresholds as well.
-    Example: `{"kwargs": {"thresholds": {"FIRST_NAME": 0.8}, "log_odds": {"FIRST_NAME": 6.8}}}`
+:   Similar to the above function, but uses a log-odds ratio to determine if the features are a match 
+    probabilistically. This is useful when wanting to more robustly compare features by incorporating
+    their predictive power (i.e., the log-odds ratio for a feature represents how powerful of a predictor
+    that feature is in determining whether two patient records are a true match, as opposed to a match
+    by random chance). Use the kwargs parameter to specify the fuzzy match threshold and log-odds ratio
+    based on training. Example: `{"kwargs": {"thresholds": {"FIRST_NAME": 0.8}, "log_odds": {"FIRST_NAME": 6.8}}}`
diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+# This script builds the public documentation for this repository.
+#
+# Usage: build_docs.sh
+# Requires: npx
+
+set -e
+
+cd "$(dirname "$0")/.."
+
+OUT=${1:-_site}
+VERSION=${VERSION:-$(python -c "from recordlinker._version import __version__; print(f'v{__version__}');")}
+SITE_NAME="RecordLinker Documentation (${VERSION})"
+
+SITE_NAME=${SITE_NAME} mkdocs build --config-file docs/mkdocs.yml -d "../${OUT}"
+python -m recordlinker.utils.openapi_schema > ${OUT}/openapi.json
+npx  @redocly/cli build-docs -o "${OUT}/api-docs.html" "${OUT}/openapi.json"
diff --git a/src/recordlinker/linking/link.py b/src/recordlinker/linking/link.py
@@ -24,7 +24,7 @@
     TRACER = trace.get_tracer(__name__)
 except ImportError:
     # OpenTelemetry is an optional dependency, if its not installed use a mock tracer
-    from recordlinker.utils import MockTracer
+    from recordlinker.utils.mock import MockTracer
 
     TRACER = MockTracer()
 

diff --git a/src/recordlinker/models/algorithm.py b/src/recordlinker/models/algorithm.py
@@ -9,6 +9,7 @@
 from recordlinker import utils
 from recordlinker.config import ConfigurationError
 from recordlinker.config import settings
+from recordlinker.utils import functools as func_utils
 
 from .base import Base
 
@@ -105,7 +106,7 @@ def bound_evaluators(self) -> dict[str, typing.Callable]:
         Get the evaluators for this algorithm pass, bound to the algorithm.
         """
         if not hasattr(self, "_bound_evaluators"):
-            self._bound_evaluators = utils.bind_functions(self.evaluators)
+            self._bound_evaluators = func_utils.bind_functions(self.evaluators)
         return self._bound_evaluators
 
     @property
@@ -129,7 +130,7 @@ def bound_rule(self) -> typing.Callable:
         Get the rule for this algorithm pass, bound to the algorithm.
         """
         if not hasattr(self, "_bound_rule"):
-            self._bound_rule = utils.str_to_callable(self.rule)
+            self._bound_rule = func_utils.str_to_callable(self.rule)
         return self._bound_rule
 
 

diff --git a/src/recordlinker/routes/link_router.py b/src/recordlinker/routes/link_router.py
@@ -28,7 +28,7 @@ async def link_piirecord(
     db_session: orm.Session = fastapi.Depends(get_session),
 ) -> schemas.LinkResponse:
     """
-    Compare a PII Reocrd with records in the Master Patient Index (MPI) to
+    Compare a PII Record with records in the Master Patient Index (MPI) to
     check for matches with existing patient records If matches are found,
     returns the patient and person reference id's
     """
@@ -187,4 +187,4 @@ async def link_fhir(
 
     except ValueError:
         response.status_code = fastapi.status.HTTP_400_BAD_REQUEST
-        raise fastapi.HTTPException(status_code=400, detail="Error: Bad request")
+        raise fastapi.HTTPException(status_code=400, detail="Error: Bad request")
diff --git a/src/recordlinker/schemas/algorithm.py b/src/recordlinker/schemas/algorithm.py
@@ -10,16 +10,17 @@
 
 import pydantic
 
-from recordlinker import utils
 from recordlinker.linking import matchers
 from recordlinker.models.mpi import BlockingKey
 from recordlinker.schemas.pii import Feature
+from recordlinker.utils import functools as utils
 
 
 class AlgorithmPass(pydantic.BaseModel):
     """
     The schema for an algorithm pass record.
     """
+
     model_config = pydantic.ConfigDict(from_attributes=True)
 
     blocking_keys: list[str]
@@ -88,7 +89,7 @@ class AlgorithmSummary(Algorithm):
     passes: typing.Sequence[AlgorithmPass] = pydantic.Field(exclude=True)
 
     # mypy doesn't support decorators on properties; https://github.com/python/mypy/issues/1362
-    @pydantic.computed_field # type: ignore[misc]
+    @pydantic.computed_field  # type: ignore[misc]
     @property
     def pass_count(self) -> int:
         """

diff --git a/src/recordlinker/utils/__init__.py b/src/recordlinker/utils/__init__.py
@@ -0,0 +1,25 @@
+import json
+import pathlib
+
+
+def project_root() -> pathlib.Path:
+    """
+    Returns the path to the project root directory.
+    """
+    root = pathlib.Path(__file__).resolve()
+    while root.name != "recordlinker":
+        if root.parent == root:
+            raise FileNotFoundError("recordlinker project root not found.")
+        root = root.parent
+    return root
+
+
+def read_json(path: str) -> dict:
+    """
+    Loads a JSON file.
+    """
+    if not pathlib.Path(path).is_absolute():
+        # if path is relative, append to the project root
+        path = str(pathlib.Path(project_root(), path))
+    with open(path, "r") as fobj:
+        return json.load(fobj)
diff --git a/src/recordlinker/utils.py → src/recordlinker/utils/functools.py b/src/recordlinker/utils.py → src/recordlinker/utils/functools.py
@@ -1,34 +1,9 @@
 import copy
 import importlib
 import inspect
-import json
-import pathlib
 import typing
 
 
-def project_root() -> pathlib.Path:
-    """
-    Returns the path to the project root directory.
-    """
-    root = pathlib.Path(__file__).resolve()
-    while root.name != "recordlinker":
-        if root.parent == root:
-            raise FileNotFoundError("recordlinker project root not found.")
-        root = root.parent
-    return root
-
-
-def read_json(path: str) -> dict:
-    """
-    Loads a JSON file.
-    """
-    if not pathlib.Path(path).is_absolute():
-        # if path is relative, append to the project root
-        path = str(pathlib.Path(project_root(), path))
-    with open(path, "r") as fobj:
-        return json.load(fobj)
-
-
 def bind_functions(data: dict) -> dict:
     """
     Binds the functions in the data to the functions in the module.
@@ -125,25 +100,3 @@ def _compare_types(actual_type, expected_type):
 
     # Compare return type
     return _compare_types(fn_signature.return_annotation, expected_return)
-
-
-class MockTracer:
-    """
-    A no-op OTel tracer that can be used in place of a real tracer. This is useful
-    for situations where users decide to not install the otelemetry package.
-    """
-    def start_as_current_span(self, name, **kwargs):
-        """Returns a no-op span"""
-        return self
-
-    def __enter__(self):
-        """No-op for context manager entry"""
-        pass
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        """No-op for context manager exit"""
-        pass
-
-    def start_span(self, name, **kwargs):
-        """Returns a no-op span"""
-        return self
diff --git a/src/recordlinker/utils/mock.py b/src/recordlinker/utils/mock.py
@@ -0,0 +1,21 @@
+class MockTracer:
+    """
+    A no-op OTel tracer that can be used in place of a real tracer. This is useful
+    for situations where users decide to not install the otelemetry package.
+    """
+
+    def start_as_current_span(self, name, **kwargs):
+        """Returns a no-op span"""
+        return self
+
+    def __enter__(self):
+        """No-op for context manager entry"""
+        pass
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """No-op for context manager exit"""
+        pass
+
+    def start_span(self, name, **kwargs):
+        """Returns a no-op span"""
+        return self
diff --git a/src/recordlinker/utils/openapi_schema.py b/src/recordlinker/utils/openapi_schema.py
@@ -0,0 +1,36 @@
+"""
+recordlinker.openapi_schema
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This module exports the OpenAPI schema for the Record Linker service.
+"""
+
+import json
+import sys
+import typing
+
+from fastapi.openapi.utils import get_openapi
+
+from recordlinker import main
+
+
+def export_json(file: typing.TextIO):
+    """
+    Export the OpenAPI schema to a JSON file.
+    """
+    json.dump(
+        get_openapi(
+            title=main.app.title,
+            version=main.app.version,
+            openapi_version=main.app.openapi_version,
+            description=main.app.description,
+            routes=main.app.routes,
+            license_info=main.app.license_info,
+            contact=main.app.contact,
+        ),
+        file,
+    )
+
+
+if __name__ == "__main__":
+    export_json(sys.stdout)
diff --git a/tests/unit/utils/__init__.py b/tests/unit/utils/__init__.py
diff --git a/tests/unit/test_utils.py → tests/unit/utils/test_functools.py b/tests/unit/test_utils.py → tests/unit/utils/test_functools.py
@@ -5,8 +5,8 @@
 
 import pytest
 
-from recordlinker import utils
 from recordlinker.linking import matchers
+from recordlinker.utils import functools as utils
 
 
 def test_project_root():
@@ -113,15 +113,3 @@ def test_check_signature(self):
         assert not utils.check_signature(self.func2, typing.Callable[[int, list[int]], None])
         assert utils.check_signature(self.func2, typing.Callable[[int, list[int]], float])
         assert not utils.check_signature("a", typing.Callable[[str], None])
-
-
-class TestMockTracer:
-    def test_start_span(self):
-        tracer = utils.MockTracer()
-        with tracer.start_span("test_span") as span:
-            assert span is None
-
-    def test_start_as_current_span(self):
-        tracer = utils.MockTracer()
-        with tracer.start_as_current_span("test.span") as span:
-            assert span is None
diff --git a/tests/unit/utils/test_mock.py b/tests/unit/utils/test_mock.py
@@ -0,0 +1,13 @@
+from recordlinker.utils import mock as utils
+
+
+class TestMockTracer:
+    def test_start_span(self):
+        tracer = utils.MockTracer()
+        with tracer.start_span("test_span") as span:
+            assert span is None
+
+    def test_start_as_current_span(self):
+        tracer = utils.MockTracer()
+        with tracer.start_as_current_span("test.span") as span:
+            assert span is None