diff --git a/.github/workflows/check_smoke_tests.yml b/.github/workflows/check_smoke_tests.yml index 5cd009d4..d1d33b9c 100644 --- a/.github/workflows/check_smoke_tests.yml +++ b/.github/workflows/check_smoke_tests.yml @@ -62,7 +62,7 @@ jobs: -H "Content-Type: application/json") echo "Response: $RESPONSE_1" - echo "$RESPONSE_1" | jq -e '.is_match == false' + echo "$RESPONSE_1" | jq -e '.prediction == "no_match"' PERSON_REFERENCE_ID=$(echo "$RESPONSE_1" | jq -r '.person_reference_id') @@ -71,7 +71,7 @@ jobs: -H "Content-Type: application/json") echo "Response: $RESPONSE_2" - echo "$RESPONSE_2" | jq -e '.is_match == true' + echo "$RESPONSE_2" | jq -e '.prediction == "match"' echo "$RESPONSE_2" | jq -e --arg id "$PERSON_REFERENCE_ID" '.person_reference_id == $id' #enhanced tests @@ -80,7 +80,7 @@ jobs: -H "Content-Type: application/json") echo "Response: $RESPONSE_3" - echo "$RESPONSE_3" | jq -e '.is_match == false' + echo "$RESPONSE_3" | jq -e '.prediction == "no_match"' PERSON_REFERENCE_ID=$(echo "$RESPONSE_3" | jq -r '.person_reference_id') @@ -89,7 +89,7 @@ jobs: -H "Content-Type: application/json") echo "Response: $RESPONSE_4" - echo "$RESPONSE_4" | jq -e '.is_match == true' + echo "$RESPONSE_4" | jq -e '.prediction == "match"' echo "$RESPONSE_4" | jq -e --arg id "$PERSON_REFERENCE_ID" '.person_reference_id == $id' #invalid tests diff --git a/src/recordlinker/assets/initial_algorithms.json b/src/recordlinker/assets/initial_algorithms.json index 51c40876..2b2ec3d9 100644 --- a/src/recordlinker/assets/initial_algorithms.json +++ b/src/recordlinker/assets/initial_algorithms.json @@ -3,6 +3,8 @@ "label": "dibbs-basic", "description": "The DIBBs Default Algorithm. Based on field experimentation and statistical analysis, this deterministic two-pass algorithm combines geographical and personal information to maximize linkage quality while minimizing false positives", "is_default": true, + "include_multiple_matches": true, + "belongingness_ratio": [0.75, 0.9], "passes": [ { "blocking_keys": [ @@ -15,7 +17,6 @@ "LAST_NAME": "func:recordlinker.linking.matchers.feature_match_exact" }, "rule": "func:recordlinker.linking.matchers.eval_perfect_match", - "cluster_ratio": 0.9, "kwargs": { "thresholds": { "FIRST_NAME": 0.9, @@ -39,7 +40,6 @@ "BIRTHDATE": "func:recordlinker.linking.matchers.feature_match_exact" }, "rule": "func:recordlinker.linking.matchers.eval_perfect_match", - "cluster_ratio": 0.9, "kwargs": { "thresholds": { "FIRST_NAME": 0.9, @@ -57,6 +57,8 @@ "label": "dibbs-enhanced", "description": "The DIBBs Log-Odds Algorithm. This optional algorithm uses statistical correction to adjust the links between incoming records and previously processed patients (it does so by taking advantage of the fact that some fields are more informative than others—e.g., two records matching on MRN is stronger evidence that they should be linked than if the records matched on zip code). It can be used if additional granularity in matching links is desired. However, while the DIBBs Log-Odds Algorithm can create higher-quality links, it is dependent on statistical updating and pre-calculated population analysis, which requires some work on the part of the user. For those cases where additional precision or stronger matching criteria are required, the Log-Odds algorithm is detailed below.", "is_default": false, + "include_multiple_matches": true, + "belongingness_ratio": [0.75, 0.9], "passes": [ { "blocking_keys": [ @@ -69,7 +71,6 @@ "LAST_NAME": "func:recordlinker.linking.matchers.feature_match_log_odds_fuzzy_compare" }, "rule": "func:recordlinker.linking.matchers.eval_log_odds_cutoff", - "cluster_ratio": 0.9, "kwargs": { "similarity_measure": "JaroWinkler", "thresholds": { @@ -106,7 +107,6 @@ "BIRTHDATE": "func:recordlinker.linking.matchers.feature_match_log_odds_fuzzy_compare" }, "rule": "func:recordlinker.linking.matchers.eval_log_odds_cutoff", - "cluster_ratio": 0.9, "kwargs": { "similarity_measure": "JaroWinkler", "thresholds": { diff --git a/src/recordlinker/database/mpi_service.py b/src/recordlinker/database/mpi_service.py index 52922949..863431e1 100644 --- a/src/recordlinker/database/mpi_service.py +++ b/src/recordlinker/database/mpi_service.py @@ -82,8 +82,6 @@ def insert_patient( :returns: The inserted Patient record """ - # create a new Person record if one isn't provided - person = person or models.Person() patient = models.Patient(person=person, record=record, external_patient_id=external_patient_id) @@ -127,12 +125,13 @@ def bulk_insert_patients( if not records: return [] - person = person or models.Person() - session.add(person) - session.flush() + if person: + session.add(person) + session.flush() + pat_data = [ { - "person_id": person.id, + "person_id": person and person.id, "_data": record.to_json(prune_empty=True), "external_patient_id": record.external_id, "external_person_id": external_person_id, diff --git a/src/recordlinker/linking/link.py b/src/recordlinker/linking/link.py index 62633e00..b0848a22 100644 --- a/src/recordlinker/linking/link.py +++ b/src/recordlinker/linking/link.py @@ -6,8 +6,8 @@ """ import collections +import dataclasses import typing -import uuid from sqlalchemy import orm @@ -29,6 +29,12 @@ TRACER = MockTracer() +@dataclasses.dataclass +class LinkResult: + person: models.Person + belongingness_ratio: float + + def compare( record: schemas.PIIRecord, patient: models.Patient, algorithm_pass: models.AlgorithmPass ) -> bool: @@ -59,7 +65,7 @@ def link_record_against_mpi( session: orm.Session, algorithm: models.Algorithm, external_person_id: typing.Optional[str] = None, -) -> tuple[bool, uuid.UUID, uuid.UUID]: +) -> tuple[models.Patient, models.Person | None, list[LinkResult]]: """ Runs record linkage on a single incoming record (extracted from a FHIR bundle) using an existing database as an MPI. Uses a flexible algorithm @@ -82,10 +88,10 @@ def link_record_against_mpi( # Membership scores need to persist across linkage passes so that we can # find the highest scoring match across all passes scores: dict[models.Person, float] = collections.defaultdict(float) + # the minimum ratio of matches needed to be considered a cluster member + belongingness_ratio_lower_bound, belongingness_ratio_upper_bound = algorithm.belongingness_ratio for algorithm_pass in algorithm.passes: with TRACER.start_as_current_span("link.pass"): - # the minimum ratio of matches needed to be considered a cluster member - cluster_ratio = algorithm_pass.cluster_ratio # initialize a dictionary to hold the clusters of patients for each person clusters: dict[models.Person, list[models.Patient]] = collections.defaultdict(list) # block on the pii_record and the algorithm's blocking criteria, then @@ -108,17 +114,33 @@ def link_record_against_mpi( if compare(record, patient, algorithm_pass): matched_count += 1 # calculate the match ratio for this person cluster - match_ratio = matched_count / len(patients) - if match_ratio >= cluster_ratio: + belongingness_ratio = matched_count / len(patients) + if belongingness_ratio >= belongingness_ratio_lower_bound: # The match ratio is larger than the minimum cluster threshold, # optionally update the max score for this person - scores[person] = max(scores[person], match_ratio) + scores[person] = max(scores[person], belongingness_ratio) matched_person: typing.Optional[models.Person] = None if scores: # Find the person with the highest matching score matched_person, _ = max(scores.items(), key=lambda i: i[1]) + sorted_scores: list[LinkResult] = [LinkResult(k, v) for k, v in sorted(scores.items(), reverse=True, key=lambda item: item[1])] + if not scores: + # No match + matched_person = models.Person() # Create new Person Cluster + results = [] + elif sorted_scores[0].belongingness_ratio >= belongingness_ratio_upper_bound: + # Match (1 or many) + matched_person = sorted_scores[0].person + results = [x for x in sorted_scores if x.belongingness_ratio >= belongingness_ratio_upper_bound] # Multiple matches + if not algorithm.include_multiple_matches: + results = [results[0]] # 1 Match (highest Belongingness Ratio) + else: + # Possible match + matched_person = None + results = sorted_scores + with TRACER.start_as_current_span("insert"): patient = mpi_service.insert_patient( session, @@ -130,4 +152,4 @@ def link_record_against_mpi( ) # return a tuple indicating whether a match was found and the person ID - return (bool(matched_person), patient.person.reference_id, patient.reference_id) + return (patient, patient.person, results) diff --git a/src/recordlinker/models/algorithm.py b/src/recordlinker/models/algorithm.py index 1f6c99f2..5b5f70f9 100644 --- a/src/recordlinker/models/algorithm.py +++ b/src/recordlinker/models/algorithm.py @@ -23,10 +23,27 @@ class Algorithm(Base): is_default: orm.Mapped[bool] = orm.mapped_column(default=False, index=True) label: orm.Mapped[str] = orm.mapped_column(sqltypes.String(255), unique=True) description: orm.Mapped[str] = orm.mapped_column(sqltypes.Text(), nullable=True) + include_multiple_matches: orm.Mapped[bool] = orm.mapped_column(sqltypes.Boolean, default=True) + belongingness_ratio_lower_bound: orm.Mapped[float] = orm.mapped_column(sqltypes.Float, default=1.0) + belongingness_ratio_upper_bound: orm.Mapped[float] = orm.mapped_column(sqltypes.Float, default=1.0) passes: orm.Mapped[list["AlgorithmPass"]] = orm.relationship( back_populates="algorithm", cascade="all, delete-orphan" ) + @property + def belongingness_ratio(self) -> tuple[float, float]: + """ + Get the Belongingness Ratio Threshold Range for this algorithm pass. + """ + return (self.belongingness_ratio_lower_bound, self.belongingness_ratio_upper_bound) + + @belongingness_ratio.setter # type: ignore + def belongingness_ratio(self, value: tuple[float, float]): + """ + Set the Belongingess Ratio for this algorithm pass. + """ + self.belongingness_ratio_lower_bound, self.belongingness_ratio_upper_bound = value + @classmethod def from_dict(cls, **data: dict) -> "Algorithm": """ @@ -82,7 +99,6 @@ class AlgorithmPass(Base): blocking_keys: orm.Mapped[list[str]] = orm.mapped_column(sqltypes.JSON) _evaluators: orm.Mapped[dict[str, str]] = orm.mapped_column("evaluators", sqltypes.JSON) _rule: orm.Mapped[str] = orm.mapped_column("rule", sqltypes.String(255)) - cluster_ratio: orm.Mapped[float] = orm.mapped_column(sqltypes.Float) kwargs: orm.Mapped[dict] = orm.mapped_column(sqltypes.JSON, default=dict) @property diff --git a/src/recordlinker/models/mpi.py b/src/recordlinker/models/mpi.py index 8cbfb535..d9738ac1 100644 --- a/src/recordlinker/models/mpi.py +++ b/src/recordlinker/models/mpi.py @@ -39,7 +39,7 @@ class Patient(Base): __tablename__ = "mpi_patient" id: orm.Mapped[int] = orm.mapped_column(get_bigint_pk(), autoincrement=True, primary_key=True) - person_id: orm.Mapped[int] = orm.mapped_column(schema.ForeignKey(f"{Person.__tablename__}.id")) + person_id: orm.Mapped[int] = orm.mapped_column(schema.ForeignKey(f"{Person.__tablename__}.id"), nullable=True) person: orm.Mapped["Person"] = orm.relationship(back_populates="patients") # NOTE: We're using a protected attribute here to store the data string, as we # want getter/setter access to the data dictionary to trigger updating the diff --git a/src/recordlinker/routes/link_router.py b/src/recordlinker/routes/link_router.py index 9f56e7f3..95511816 100644 --- a/src/recordlinker/routes/link_router.py +++ b/src/recordlinker/routes/link_router.py @@ -46,16 +46,16 @@ async def link_piirecord( # link the record try: # Make a copy of record_to_link so we don't modify the original - (found_match, new_person_id, patient_reference_id) = link.link_record_against_mpi( + (patient, person, results) = link.link_record_against_mpi( record=input.record, session=db_session, algorithm=algorithm, external_person_id=input.external_person_id, ) return schemas.LinkResponse( - is_match=found_match, - patient_reference_id=patient_reference_id, - person_reference_id=new_person_id, + patient_reference_id=patient.reference_id, + person_reference_id=(person and person.reference_id), + results=[schemas.LinkResult(**r.__dict__) for r in results] ) except ValueError: @@ -86,12 +86,10 @@ async def link_dibbs( algorithm = algorithm_service.default_algorithm(db_session) if not algorithm: - response.status_code = fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY - return schemas.LinkFhirResponse( - found_match=False, - updated_bundle=input_bundle, - message="Error: No algorithm found", - ) + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Error: Invalid algorithm specified" + ) # Now extract the patient record we want to link try: @@ -101,36 +99,40 @@ async def link_dibbs( if entry.get("resource", {}).get("resourceType", "") == "Patient" ][0] except IndexError: - response.status_code = fastapi.status.HTTP_400_BAD_REQUEST - return schemas.LinkFhirResponse( - found_match=False, - updated_bundle=input_bundle, - message="Supplied bundle contains no Patient resource to link on.", - ) + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_400_BAD_REQUEST, + detail="Supplied bundle contains no Patient resource to link on." + ) + # convert record to PII pii_record: schemas.PIIRecord = fhir.fhir_record_to_pii_record(record_to_link) # Now link the record try: - (found_match, new_person_id, _) = link.link_record_against_mpi( + (patient, person, results) = link.link_record_against_mpi( record=pii_record, session=db_session, algorithm=algorithm, external_person_id=external_id, ) - updated_bundle = fhir.add_person_resource( - str(new_person_id), pii_record.external_id, input_bundle + updated_bundle: dict | None = None + if person: + updated_bundle = fhir.add_person_resource( + str(person.reference_id), pii_record.external_id, input_bundle + ) + return schemas.LinkFhirResponse( + patient_reference_id=patient.reference_id, + person_reference_id=(person and person.reference_id), + results=[schemas.LinkResult(**r.__dict__) for r in results], + updated_bundle=updated_bundle ) - return schemas.LinkFhirResponse(found_match=found_match, updated_bundle=updated_bundle) except ValueError as err: - response.status_code = fastapi.status.HTTP_400_BAD_REQUEST - return schemas.LinkFhirResponse( - found_match=False, - updated_bundle=input_bundle, - message=f"Could not connect to database: {err}", - ) + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_400_BAD_REQUEST, + detail=f"Could not connect to database: {err}" + ) @router.post("/fhir", summary="Link FHIR") @@ -177,18 +179,21 @@ async def link_fhir( # link the record try: # Make a copy of pii_record so we don't modify the original - (found_match, new_person_id, patient_reference_id) = link.link_record_against_mpi( + (patient, person, results) = link.link_record_against_mpi( record=pii_record, session=db_session, algorithm=algorithm, external_person_id=external_id, ) return schemas.LinkResponse( - is_match=found_match, - patient_reference_id=patient_reference_id, - person_reference_id=new_person_id, + patient_reference_id=patient.reference_id, + person_reference_id=(person and person.reference_id), + results=[schemas.LinkResult(**r.__dict__) for r in results] ) except ValueError: response.status_code = fastapi.status.HTTP_400_BAD_REQUEST - raise fastapi.HTTPException(status_code=400, detail="Error: Bad request") + raise fastapi.HTTPException( + status_code=400, + detail="Error: Bad request" + ) diff --git a/src/recordlinker/schemas/__init__.py b/src/recordlinker/schemas/__init__.py index f41d5cc6..dbebdafd 100644 --- a/src/recordlinker/schemas/__init__.py +++ b/src/recordlinker/schemas/__init__.py @@ -5,6 +5,7 @@ from .link import LinkFhirResponse from .link import LinkInput from .link import LinkResponse +from .link import LinkResult from .mpi import PatientPersonRef from .mpi import PatientRef from .mpi import PersonRef @@ -23,6 +24,7 @@ "PIIRecord", "LinkInput", "LinkResponse", + "LinkResult", "LinkFhirInput", "LinkFhirResponse", "PersonRef", diff --git a/src/recordlinker/schemas/algorithm.py b/src/recordlinker/schemas/algorithm.py index b9710bea..3b9edce8 100644 --- a/src/recordlinker/schemas/algorithm.py +++ b/src/recordlinker/schemas/algorithm.py @@ -9,6 +9,7 @@ import typing import pydantic +from typing_extensions import Annotated from recordlinker.linking import matchers from recordlinker.models.mpi import BlockingKey @@ -26,7 +27,6 @@ class AlgorithmPass(pydantic.BaseModel): blocking_keys: list[str] evaluators: dict[str, str] rule: str - cluster_ratio: float kwargs: dict[str, typing.Any] = {} @pydantic.field_validator("blocking_keys", mode="before") @@ -44,7 +44,7 @@ def validate_blocking_keys(cls, value): @pydantic.field_validator("evaluators", mode="before") def validate_evaluators(cls, value): """ - Validated the evaluators into a list of feature comparison functions. + Validate the evaluators into a list of feature comparison functions. """ for k, v in value.items(): try: @@ -78,8 +78,20 @@ class Algorithm(pydantic.BaseModel): label: str = pydantic.Field(pattern=r"^[a-z0-9]+(?:-[a-z0-9]+)*$") description: typing.Optional[str] = None is_default: bool = False + include_multiple_matches: bool = True + belongingness_ratio: tuple[Annotated[float, pydantic.Field(ge=0, le=1)], Annotated[float, pydantic.Field(ge=0, le=1)]] passes: typing.Sequence[AlgorithmPass] + @pydantic.field_validator("belongingness_ratio", mode="before") + def validate_belongingness_ratio(cls, value): + """ + Validate the Belongingness Ratio Threshold Range. + """ + lower_bound, upper_bound = value + if lower_bound > upper_bound: + raise ValueError(f"Invalid range. Lower bound must be less than upper bound: {value}") + return (lower_bound, upper_bound) + class AlgorithmSummary(Algorithm): """ diff --git a/src/recordlinker/schemas/link.py b/src/recordlinker/schemas/link.py index 8e27479d..e1da5142 100644 --- a/src/recordlinker/schemas/link.py +++ b/src/recordlinker/schemas/link.py @@ -31,21 +31,66 @@ class LinkInput(pydantic.BaseModel): ) +class LinkResult(pydantic.BaseModel): + """ + Schema for linkage results to a person cluster. + """ + + person_reference_id: uuid.UUID = pydantic.Field( + description="The identifier for a person that the patient may be linked to." + ) + + belongingness_ratio: typing.Annotated[float, pydantic.Field(ge=0, le=1)] = pydantic.Field( + description="The proportion of patient records matched in this person cluster (" + "between 0 and 1.0)." + ) + + + @pydantic.model_validator(mode="before") + @classmethod + def extract_person_reference_id(cls, data: typing.Any) -> typing.Any: + """ + Extract the person_reference_id from the person_reference_id field. + """ + person = data.pop("person", None) + if person: + data["person_reference_id"] = person.reference_id + return data + + class LinkResponse(pydantic.BaseModel): """ Schema for responses from the link endpoint. """ - is_match: bool = pydantic.Field( - description="A true value indicates that one or more existing records " - "matched with the provided record, and these results have been linked." - ) + patient_reference_id: uuid.UUID = pydantic.Field( - description="The unique identifier for the patient that has been linked" + description="The unique identifier for the patient that has been linked." ) - person_reference_id: uuid.UUID = pydantic.Field( - description="The identifier for the person that the patient record has " "been linked to.", + person_reference_id: uuid.UUID | None = pydantic.Field( + description="The identifier for the person that the patient record has been linked to." + " If prediction=\"possible_match\", this value will be null." ) + results: list[LinkResult] = pydantic.Field( + description="A list of (possibly) matched Persons. If prediction='match', either the single" + "(include_multiple_matches=False) or multiple (include_multiple_matches=True) " + "Persons with which the Patient record matches. If prediction='possible_match'," + "all Persons with which the Patient record possibly matches." + ) + + # mypy doesn't support decorators on properties; https://github.com/python/mypy/issues/1362 + @pydantic.computed_field # type: ignore[misc] + @property + def prediction(self) -> typing.Literal["match", "possible_match", "no_match"]: + """ + Record Linkage algorithm prediction. + """ + if self.person_reference_id and self.results: + return "match" + elif not self.results: + return "no_match" + else: + return "possible_match" class LinkFhirInput(pydantic.BaseModel): @@ -69,23 +114,14 @@ class LinkFhirInput(pydantic.BaseModel): ) -class LinkFhirResponse(pydantic.BaseModel): +class LinkFhirResponse(LinkResponse): """ The schema for responses from the link FHIR endpoint. """ - found_match: bool = pydantic.Field( - description="A true value indicates that one or more existing records " - "matched with the provided record, and these results have been linked." - ) - updated_bundle: dict = pydantic.Field( - description="If link_found is true, returns the FHIR bundle with updated" - " references to existing Personresource. If link_found is false, " + updated_bundle: dict | None = pydantic.Field( + description="If 'prediction' is 'match', returns the FHIR bundle with updated" + " references to existing Person resource. If 'prediction' is 'no_match', " "returns the FHIR bundle with a reference to a newly created " - "Person resource." - ) - message: typing.Optional[str] = pydantic.Field( - description="An optional message in the case that the linkage endpoint did " - "not run successfully containing a description of the error that happened.", - default="", + "Person resource. If 'prediction' is 'possible_match', returns null." ) diff --git a/tests/unit/assets/multiple_matches_patient_bundle.json b/tests/unit/assets/multiple_matches_patient_bundle.json new file mode 100644 index 00000000..9fe3eff8 --- /dev/null +++ b/tests/unit/assets/multiple_matches_patient_bundle.json @@ -0,0 +1,214 @@ +{ + "resourceType": "Bundle", + "identifier": { + "value": "a very contrived FHIR bundle" + }, + "entry": [ + { + "resource": { + "resourceType": "Patient", + "id": "f6a16ff7-4a31-11eb-be7b-8344edc8f36b", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith", + "given": [ + "John" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "1234 Silversun Strip" + ], + "buildingNumber": "1234", + "city": "Boston", + "state": "Massachusetts", + "postalCode": "99999", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "resource": { + "resourceType": "Patient", + "id": "2fdd0b8b-4a70-11eb-99fd-ad786a821574", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith", + "given": [ + "Jonathan" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "1234 Silversun Strip" + ], + "buildingNumber": "1234", + "city": "Boston", + "state": "Massachusetts", + "postalCode": "99999", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "resource": { + "resourceType": "Patient", + "id": "2fdd0b8b-4a70-11eb-99fd-ad786a821574", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith", + "given": [ + "Johna" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "1234 Silversun Strip" + ], + "buildingNumber": "1234", + "city": "Boston", + "state": "Massachusetts", + "postalCode": "99999", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "resource": { + "resourceType": "Patient", + "id": "2fdd0b8b-4a70-11eb-99fd-ad786a821574", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith", + "given": [ + "Jona" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "1234 Silversun Strip" + ], + "buildingNumber": "1234", + "city": "Boston", + "state": "Massachusetts", + "postalCode": "99999", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "request": { + "method": "GET", + "url": "testing for entry with no resource" + } + } + ] +} diff --git a/tests/unit/assets/possible_match_basic_patient_bundle.json b/tests/unit/assets/possible_match_basic_patient_bundle.json new file mode 100644 index 00000000..7d5d70d3 --- /dev/null +++ b/tests/unit/assets/possible_match_basic_patient_bundle.json @@ -0,0 +1,164 @@ +{ + "resourceType": "Bundle", + "identifier": { + "value": "a very contrived FHIR bundle" + }, + "entry": [ + { + "resource": { + "resourceType": "Patient", + "id": "f6a16ff7-4a31-11eb-be7b-8344edc8f36b", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith", + "given": [ + "John" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "1234 Silversun Strip" + ], + "buildingNumber": "1234", + "city": "Boston", + "state": "Massachusetts", + "postalCode": "99999", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "resource": { + "resourceType": "Patient", + "id": "2fdd0b8b-4a70-11eb-99fd-ad786a821574", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith", + "given": [ + "Johnny" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "1234 Silversun Strip" + ], + "buildingNumber": "1234", + "city": "Boston", + "state": "Massachusetts", + "postalCode": "99999", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "resource": { + "resourceType": "Patient", + "id": "2fdd0b8b-4a70-11eb-99fd-ad786a821574", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith", + "given": [ + "Jon" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "Bay 16", + "Ward Sector 24" + ], + "city": "Brooklyn", + "state": "New York", + "postalCode": "54321", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "request": { + "method": "GET", + "url": "testing for entry with no resource" + } + } + ] +} diff --git a/tests/unit/assets/possible_match_enhanced_patient_bundle.json b/tests/unit/assets/possible_match_enhanced_patient_bundle.json new file mode 100644 index 00000000..78d3aca9 --- /dev/null +++ b/tests/unit/assets/possible_match_enhanced_patient_bundle.json @@ -0,0 +1,164 @@ +{ + "resourceType": "Bundle", + "identifier": { + "value": "a very contrived FHIR bundle" + }, + "entry": [ + { + "resource": { + "resourceType": "Patient", + "id": "f6a16ff7-4a31-11eb-be7b-8344edc8f36b", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith", + "given": [ + "John" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "1234 Silversun Strip" + ], + "buildingNumber": "1234", + "city": "Boston", + "state": "Massachusetts", + "postalCode": "99999", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "resource": { + "resourceType": "Patient", + "id": "2fdd0b8b-4a70-11eb-99fd-ad786a821574", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith-Cole", + "given": [ + "John" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "Bay 16", + "Ward Sector 24" + ], + "city": "Brooklyn", + "state": "New York", + "postalCode": "54321", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "resource": { + "resourceType": "Patient", + "id": "2fdd0b8b-4a70-11eb-99fd-ad786a821574", + "identifier": [ + { + "value": "123456789", + "type": { + "coding": [ + { + "code": "MR", + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "display": "Medical record number" + } + ] + } + } + ], + "name": [ + { + "family": "Smith", + "given": [ + "Johnny" + ], + "use": "official" + } + ], + "birthDate": "1980-01-01", + "gender": "male", + "address": [ + { + "line": [ + "1234 Silversun Strip" + ], + "buildingNumber": "1234", + "city": "Boston", + "state": "Massachusetts", + "postalCode": "99999", + "use": "home" + } + ], + "telecom": [ + { + "use": "home", + "system": "phone", + "value": "123-456-7890" + } + ] + } + }, + { + "request": { + "method": "GET", + "url": "testing for entry with no resource" + } + } + ] +} diff --git a/tests/unit/database/test_algorithm_service.py b/tests/unit/database/test_algorithm_service.py index 405193d8..16a8be4a 100644 --- a/tests/unit/database/test_algorithm_service.py +++ b/tests/unit/database/test_algorithm_service.py @@ -66,12 +66,12 @@ def test_load_algorithm_created(self, session): data = schemas.Algorithm( label="dibss-basic", description="First algorithm", + belongingness_ratio=(0.75, 0.8), passes=[ schemas.AlgorithmPass( blocking_keys=["FIRST_NAME"], evaluators={"ZIP": "func:recordlinker.linking.matchers.feature_match_any"}, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.8, ) ], ) @@ -81,6 +81,7 @@ def test_load_algorithm_created(self, session): assert obj.id == 1 assert obj.label == "dibss-basic" assert obj.description == "First algorithm" + assert obj.belongingness_ratio == (0.75, 0.8) assert len(obj.passes) == 1 assert obj.passes[0].algorithm_id == 1 assert obj.passes[0].blocking_keys == ["FIRST_NAME"] @@ -88,18 +89,18 @@ def test_load_algorithm_created(self, session): "ZIP": "func:recordlinker.linking.matchers.feature_match_any" } assert obj.passes[0].rule == "func:recordlinker.linking.matchers.eval_perfect_match" - assert obj.passes[0].cluster_ratio == 0.8 + def test_load_algorithm_updated(self, session): data = schemas.Algorithm( label="dibss-basic", description="First algorithm", + belongingness_ratio=(0.75, 0.8), passes=[ schemas.AlgorithmPass( blocking_keys=["FIRST_NAME"], evaluators={"ZIP": "func:recordlinker.linking.matchers.feature_match_any"}, - rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.8, + rule="func:recordlinker.linking.matchers.eval_perfect_match" ) ], ) @@ -113,6 +114,7 @@ def test_load_algorithm_updated(self, session): assert obj.id == 1 assert obj.label == "dibss-basic" assert obj.description == "Updated description" + assert obj.belongingness_ratio == (0.75, 0.8) assert len(obj.passes) == 1 assert obj.passes[0].algorithm_id == 1 assert obj.passes[0].blocking_keys == ["LAST_NAME"] @@ -120,7 +122,6 @@ def test_load_algorithm_updated(self, session): "ZIP": "func:recordlinker.linking.matchers.feature_match_any" } assert obj.passes[0].rule == "func:recordlinker.linking.matchers.eval_perfect_match" - assert obj.passes[0].cluster_ratio == 0.8 def test_delete_algorithm(session): @@ -132,8 +133,7 @@ def test_delete_algorithm(session): algorithm=algo1, blocking_keys=["FIRST_NAME"], evaluators={"ZIP": "func:recordlinker.linking.matchers.feature_match_any"}, - rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.8, + rule="func:recordlinker.linking.matchers.eval_perfect_match" ) session.add(pass1) session.commit() @@ -152,8 +152,7 @@ def test_clear_algorithms(session): algorithm=algo1, blocking_keys=["FIRST_NAME"], evaluators={"ZIP": "func:recordlinker.linking.matchers.feature_match_any"}, - rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.8, + rule="func:recordlinker.linking.matchers.eval_perfect_match" ) session.add(pass1) session.commit() diff --git a/tests/unit/database/test_mpi_service.py b/tests/unit/database/test_mpi_service.py index 1378c9e6..893e3af7 100644 --- a/tests/unit/database/test_mpi_service.py +++ b/tests/unit/database/test_mpi_service.py @@ -138,7 +138,7 @@ def test_no_person(self, session): } record = schemas.PIIRecord(**data) patient = mpi_service.insert_patient(session, record) - assert patient.person_id is not None + assert patient.person_id is None assert patient.data["birth_date"] == "1980-01-01" assert patient.data["name"] == [ { @@ -151,8 +151,6 @@ def test_no_person(self, session): ] assert patient.external_person_id is None assert patient.external_person_source is None - assert patient.person.reference_id is not None - assert patient.person.id == patient.person_id assert len(patient.blocking_values) == 4 def test_no_person_with_external_id(self, session): @@ -169,7 +167,7 @@ def test_no_person_with_external_id(self, session): } record = schemas.PIIRecord(**data) patient = mpi_service.insert_patient(session, record, external_person_id="123456") - assert patient.person_id is not None + assert patient.person_id is None assert patient.data["birth_date"] == "1980-01-01" assert patient.data["name"] == [ { @@ -181,9 +179,6 @@ def test_no_person_with_external_id(self, session): ] assert patient.external_person_id == "123456" assert patient.external_person_source == "IRIS" - assert patient.person.reference_id is not None - assert patient.person.id is not None - assert patient.person.id == patient.person_id assert len(patient.blocking_values) == 3 def test_with_person(self, session): @@ -256,7 +251,7 @@ def test_no_person(self, session): rec = schemas.PIIRecord(**{"name": [{"given": ["Johnathon"], "family": "Smith"}]}) patients = mpi_service.bulk_insert_patients(session, [rec], external_person_id="123456") assert len(patients) == 1 - assert patients[0].person_id is not None + assert patients[0].person_id is None assert json.loads(patients[0].data) == { "name": [{"given": ["Johnathon"], "family": "Smith"}] } @@ -324,106 +319,85 @@ def prime_index(self, session): session.flush() data = [ - ( - { - "name": [ - { - "given": [ - "Johnathon", - "Bill", - ], - "family": "Smith", - } - ], - "birthdate": "01/01/1980", - }, - person_1, - ), - ( - { - "name": [ - { - "given": [ - "George", - ], - "family": "Harrison", - } - ], - "birthdate": "1943-2-25", - }, - None, - ), - ( - { - "name": [ - { - "given": [ - "John", - ], - "family": "Doe", - }, - {"given": ["John"], "family": "Lewis"}, - ], - "birthdate": "1980-01-01", - }, - None, - ), - ( - { - "name": [ - { - "given": [ - "Bill", - ], - "family": "Smith", - } - ], - "birthdate": "1980-01-01", - }, - person_1, - ), - ( - { - "name": [ - { - "given": [ - "John", - ], - "family": "Smith", - } - ], - "birthdate": "1980-01-01", - }, - person_1, - ), - ( - { - "name": [ - { - "given": [ - "John", - ], - "family": "Smith", - } - ], - "birthdate": "1985-11-12", - }, - None, - ), - ( - { - "name": [ - { - "given": [ - "Ferris", - ], - "family": "Bueller", - } - ], - "birthdate": "", - }, - None, - ), + ({ + "name": [ + { + "given": [ + "Johnathon", + "Bill", + ], + "family": "Smith", + } + ], + "birthdate": "01/01/1980", + }, person_1), + ({ + "name": [ + { + "given": [ + "George", + ], + "family": "Harrison", + } + ], + "birthdate": "1943-2-25", + }, models.Person()), + ({ + "name": [ + { + "given": [ + "John", + ], + "family": "Doe", + }, + {"given": ["John"], "family": "Lewis"}, + ], + "birthdate": "1980-01-01", + }, models.Person()), + ({ + "name": [ + { + "given": [ + "Bill", + ], + "family": "Smith", + } + ], + "birthdate": "1980-01-01", + }, person_1), + ({ + "name": [ + { + "given": [ + "John", + ], + "family": "Smith", + } + ], + "birthdate": "1980-01-01", + }, person_1), + ({ + "name": [ + { + "given": [ + "John", + ], + "family": "Smith", + } + ], + "birthdate": "1985-11-12", + }, models.Person()), + ({ + "name": [ + { + "given": [ + "Ferris", + ], + "family": "Bueller", + } + ], + "birthdate": "", + }, models.Person()) ] for datum, person in data: mpi_service.insert_patient(session, schemas.PIIRecord(**datum), person=person) @@ -602,7 +576,6 @@ def test_block_on_multiple_names(self, session, prime_index): blocking_keys=["FIRST_NAME", "LAST_NAME"], evaluators={}, rule="", - cluster_ratio=1.0, kwargs={}, ) matches = mpi_service.get_block_data(session, schemas.PIIRecord(**data), algorithm_pass) @@ -650,9 +623,9 @@ def test_block_on_duplicates(self, session): ], "phone": [{"system": "phone", "value": "555-401-5073", "use": "home"}], } - mpi_service.insert_patient(session, schemas.PIIRecord(**data)) - mpi_service.insert_patient(session, schemas.PIIRecord(**data)) - mpi_service.insert_patient(session, schemas.PIIRecord(**data)) + mpi_service.insert_patient(session, schemas.PIIRecord(**data), models.Person()) + mpi_service.insert_patient(session, schemas.PIIRecord(**data), models.Person()) + mpi_service.insert_patient(session, schemas.PIIRecord(**data), models.Person()) algorithm_pass = models.AlgorithmPass( blocking_keys=["FIRST_NAME", "LAST_NAME", "ZIP", "SEX"] ) diff --git a/tests/unit/linking/test_link.py b/tests/unit/linking/test_link.py index 2efe7cd8..1f397ce2 100644 --- a/tests/unit/linking/test_link.py +++ b/tests/unit/linking/test_link.py @@ -54,7 +54,6 @@ def test_compare_match(self): "LAST_NAME": "func:recordlinker.linking.matchers.feature_match_fuzzy_string", }, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=1.0, kwargs={}, ) @@ -94,7 +93,6 @@ def test_compare_no_match(self): "LAST_NAME": "func:recordlinker.linking.matchers.feature_match_exact", }, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=1.0, kwargs={}, ) @@ -113,14 +111,44 @@ def patients(self): patients.append(fhir.fhir_record_to_pii_record(entry["resource"])) return patients + @pytest.fixture + def possible_match_basic_patients(self): + bundle = load_test_json_asset("possible_match_basic_patient_bundle.json") + patients = [] + patients: list[schemas.PIIRecord] = [] + for entry in bundle["entry"]: + if entry.get("resource", {}).get("resourceType", {}) == "Patient": + patients.append(fhir.fhir_record_to_pii_record(entry["resource"])) + return patients + + @pytest.fixture + def possible_match_enhanced_patients(self): + bundle = load_test_json_asset("possible_match_enhanced_patient_bundle.json") + patients = [] + patients: list[schemas.PIIRecord] = [] + for entry in bundle["entry"]: + if entry.get("resource", {}).get("resourceType", {}) == "Patient": + patients.append(fhir.fhir_record_to_pii_record(entry["resource"])) + return patients + + @pytest.fixture + def multiple_matches_patients(self): + bundle = load_test_json_asset("multiple_matches_patient_bundle.json") + patients = [] + patients: list[schemas.PIIRecord] = [] + for entry in bundle["entry"]: + if entry.get("resource", {}).get("resourceType", {}) == "Patient": + patients.append(fhir.fhir_record_to_pii_record(entry["resource"])) + return patients + def test_basic_match_one(self, session, basic_algorithm, patients): # Test various null data values in incoming record matches: list[bool] = [] mapped_patients: dict[str, int] = collections.defaultdict(int) - for patient in patients[:2]: - matched, pid, _ = link.link_record_against_mpi(patient, session, basic_algorithm) - matches.append(matched) - mapped_patients[pid] += 1 + for data in patients[:2]: + (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + matches.append(bool(person and results)) + mapped_patients[person.reference_id] += 1 # First patient inserted into empty MPI, no match # Second patient blocks with first patient in first pass, then fuzzy matches name @@ -130,10 +158,10 @@ def test_basic_match_one(self, session, basic_algorithm, patients): def test_basic_match_two(self, session, basic_algorithm, patients): matches: list[bool] = [] mapped_patients: dict[str, int] = collections.defaultdict(int) - for patient in patients: - matched, pid, _ = link.link_record_against_mpi(patient, session, basic_algorithm) - matches.append(matched) - mapped_patients[pid] += 1 + for data in patients: + (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + matches.append(bool(person and results)) + mapped_patients[person.reference_id] += 1 # First patient inserted into empty MPI, no match # Second patient blocks with first patient in first pass, then fuzzy matches name @@ -147,6 +175,32 @@ def test_basic_match_two(self, session, basic_algorithm, patients): assert matches == [False, True, False, True, False, False] assert sorted(list(mapped_patients.values())) == [1, 1, 1, 3] + + def test_basic_possible_match( + self, + session, + basic_algorithm, + possible_match_basic_patients: list[schemas.PIIRecord] + ): + predictions: dict[str, dict] = collections.defaultdict(dict) + # Decrease Belongingness Ratio lower bound to catch Possible Match when Belongingness Ratio = 0.5 + for lower_bound in [0.5, 0.45]: # test >= lower bound + basic_algorithm.belongingness_ratio_lower_bound = lower_bound + for i, data in enumerate(possible_match_basic_patients): + (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + predictions[i] = { + "patient": patient, + "person": person, + "results": results + } + # 1 Possible Match + assert not predictions[2]["person"] + assert len(predictions[2]["results"]) == 1 + assert predictions[2]["results"][0].person == predictions[0]["person"] + assert predictions[2]["results"][0].belongingness_ratio >= basic_algorithm.belongingness_ratio_lower_bound + assert predictions[2]["results"][0].belongingness_ratio < basic_algorithm.belongingness_ratio_upper_bound + + def test_enhanced_match_three(self, session, enhanced_algorithm, patients: list[schemas.PIIRecord]): # add an additional patient that will fuzzy match to patient 0 patient0_copy = copy.deepcopy(patients[0]) @@ -155,10 +209,10 @@ def test_enhanced_match_three(self, session, enhanced_algorithm, patients: list[ patients.append(patient0_copy) matches: list[bool] = [] mapped_patients: dict[str, int] = collections.defaultdict(int) - for patient in patients: - matched, pid, _ = link.link_record_against_mpi(patient, session, enhanced_algorithm) - matches.append(matched) - mapped_patients[pid] += 1 + for data in patients: + (patient, person, results) = link.link_record_against_mpi(data, session, enhanced_algorithm) + matches.append(bool(person and results)) + mapped_patients[person.reference_id] += 1 # First patient inserted into empty MPI, no match # Second patient blocks with first patient in first pass, then fuzzy matches name @@ -171,3 +225,78 @@ def test_enhanced_match_three(self, session, enhanced_algorithm, patients: list[ # finds greatest strength match and correctly assigns to larger cluster assert matches == [False, True, False, True, False, False, True] assert sorted(list(mapped_patients.values())) == [1, 1, 1, 4] + + + def test_enhanced_possible_match( + self, + session, + enhanced_algorithm, + possible_match_enhanced_patients: list[schemas.PIIRecord] + ): + predictions: dict[str, dict] = collections.defaultdict(dict) + # Decrease Belongingness Ratio lower bound to catch Possible Match when Belongingness Ratio = 0.5 + for lower_bound in [0.5, 0.45]: # test >= lower bound + enhanced_algorithm.belongingness_ratio_lower_bound = lower_bound + for i, data in enumerate(possible_match_enhanced_patients): + (patient, person, results) = link.link_record_against_mpi(data, session, enhanced_algorithm) + predictions[i] = { + "patient": patient, + "person": person, + "results": results + } + # 1 Possible Match + assert not predictions[2]["person"] + assert len(predictions[2]["results"]) == 1 + assert predictions[2]["results"][0].person == predictions[0]["person"] + assert predictions[2]["results"][0].belongingness_ratio >= enhanced_algorithm.belongingness_ratio_lower_bound + assert predictions[2]["results"][0].belongingness_ratio < enhanced_algorithm.belongingness_ratio_upper_bound + + + def test_include_multiple_matches_true( + self, + session, + basic_algorithm, + multiple_matches_patients: list[schemas.PIIRecord] + ): + predictions: dict[str, dict] = collections.defaultdict(dict) + # Adjust Belongingness Ratio bounds to catch Match when Belongingness Ratio = 0.5 + basic_algorithm.belongingness_ratio_lower_bound = 0.3 + for upper_bound in [0.5, 0.45]: # test >= upper bound + basic_algorithm.belongingness_ratio_upper_bound = upper_bound + for i, data in enumerate(multiple_matches_patients): + (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + predictions[i] = { + "patient": patient, + "person": person, + "results": results + } + # 2 Matches + assert len(predictions[3]["results"]) == 2 + assert predictions[3]["person"] == predictions[1]["person"] # Assign to Person with highest Belongingness Ratio (1.0) + for match in predictions[2]["results"]: + assert match.belongingness_ratio >= basic_algorithm.belongingness_ratio_upper_bound + + + def test_include_multiple_matches_false( + self, + session, + basic_algorithm, + multiple_matches_patients: list[schemas.PIIRecord] + ): + predictions: dict[str, dict] = collections.defaultdict(dict) + basic_algorithm.include_multiple_matches = False + # Adjust Belongingness Ratio bounds to catch Match when Belongingness Ratio = 0.5 + basic_algorithm.belongingness_ratio_lower_bound = 0.3 + for upper_bound in [0.5, 0.45]: # test >= upper bound + basic_algorithm.belongingness_ratio_upper_bound = upper_bound + for i, data in enumerate(multiple_matches_patients): + (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + predictions[i] = { + "patient": patient, + "person": person, + "results": results + } + # 2 Matches, but only include 1 + assert len(predictions[3]["results"]) == 1 + assert predictions[3]["person"] == predictions[1]["person"] # Assign to Person with highest Belongingness Ratio (1.0) + assert predictions[3]["results"][0].belongingness_ratio >= basic_algorithm.belongingness_ratio_upper_bound diff --git a/tests/unit/models/test_algorithm.py b/tests/unit/models/test_algorithm.py index 22d0d55b..2f873a71 100644 --- a/tests/unit/models/test_algorithm.py +++ b/tests/unit/models/test_algorithm.py @@ -82,6 +82,7 @@ def test_from_dict_with_passes(self): data = { "label": "Algorithm 1", "description": "First algorithm", + "belongingness_ratio": (0.75, 1.0), "passes": [ { "blocking_keys": ["ZIP"], @@ -90,13 +91,13 @@ def test_from_dict_with_passes(self): "LAST_NAME": "func:recordlinker.linking.matchers.feature_match_exact", }, "rule": "func:recordlinker.linking.matchers.eval_perfect_match", - "cluster_ratio": 1.0, } ], } algo = models.Algorithm.from_dict(**data) assert algo.label == "Algorithm 1" assert algo.description == "First algorithm" + assert algo.belongingness_ratio == (0.75, 1.0) assert len(algo.passes) == 1 assert algo.passes[0].blocking_keys == ["ZIP"] assert algo.passes[0].evaluators == { @@ -104,7 +105,6 @@ def test_from_dict_with_passes(self): "LAST_NAME": "func:recordlinker.linking.matchers.feature_match_exact", } assert algo.passes[0].rule == "func:recordlinker.linking.matchers.eval_perfect_match" - assert algo.passes[0].cluster_ratio == 1.0 class TestAlgorithmPass: diff --git a/tests/unit/routes/test_algorithm_router.py b/tests/unit/routes/test_algorithm_router.py index b93ceb8c..827280c4 100644 --- a/tests/unit/routes/test_algorithm_router.py +++ b/tests/unit/routes/test_algorithm_router.py @@ -23,12 +23,16 @@ def test_list(self, client): "label": "basic", "is_default": True, "description": "First algorithm", + "include_multiple_matches": True, + "belongingness_ratio": [1.0, 1.0], "pass_count": 0, }, { "label": "enhanced", "is_default": False, "description": "Second algorithm", + "include_multiple_matches": True, + "belongingness_ratio": [1.0, 1.0], "pass_count": 0, }, ] @@ -43,6 +47,7 @@ def test_get(self, client): algo = models.Algorithm( label="basic", description="First algorithm", + belongingness_ratio=(0.25, 0.5), passes=[ models.AlgorithmPass( blocking_keys=[ @@ -52,7 +57,6 @@ def test_get(self, client): "FIRST_NAME": "func:recordlinker.linking.matchers.feature_match_fuzzy_string", }, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.5, ) ], ) @@ -65,6 +69,8 @@ def test_get(self, client): "label": "basic", "is_default": False, "description": "First algorithm", + "include_multiple_matches": True, + "belongingness_ratio": [0.25, 0.5], "passes": [ { "blocking_keys": ["BIRTHDATE"], @@ -72,7 +78,6 @@ def test_get(self, client): "FIRST_NAME": "func:recordlinker.linking.matchers.feature_match_fuzzy_string", }, "rule": "func:recordlinker.linking.matchers.eval_perfect_match", - "cluster_ratio": 0.5, "kwargs": {}, } ], @@ -102,6 +107,7 @@ def test_create(self, client): payload = { "label": "basic", "description": "First algorithm", + "belongingness_ratio": (0.25, 0.5), "passes": [ { "blocking_keys": [ @@ -110,8 +116,7 @@ def test_create(self, client): "evaluators": { "FIRST_NAME": "func:recordlinker.linking.matchers.feature_match_fuzzy_string", }, - "rule": "func:recordlinker.linking.matchers.eval_perfect_match", - "cluster_ratio": 0.5, + "rule": "func:recordlinker.linking.matchers.eval_perfect_match" } ], } @@ -124,13 +129,13 @@ def test_create(self, client): assert algo.label == "basic" assert algo.is_default is False assert algo.description == "First algorithm" + assert algo.belongingness_ratio == (0.25, 0.5) assert len(algo.passes) == 1 assert algo.passes[0].blocking_keys == ["BIRTHDATE"] assert algo.passes[0].evaluators == { "FIRST_NAME": "func:recordlinker.linking.matchers.feature_match_fuzzy_string" } assert algo.passes[0].rule == "func:recordlinker.linking.matchers.eval_perfect_match" - assert algo.passes[0].cluster_ratio == 0.5 assert algo.passes[0].kwargs == {} @@ -139,6 +144,7 @@ def test_404(self, client): payload = { "label": "basic", "description": "First algorithm", + "belongingness_ratio": (1.0, 1.0), "passes": [], } response = client.put("/algorithm/unknown", json=payload) @@ -181,6 +187,7 @@ def test_update(self, client): payload = { "label": "basic", "description": "Updated algorithm", + "belongingness_ratio": (0.25, 0.5), "passes": [ { "blocking_keys": [ @@ -190,7 +197,6 @@ def test_update(self, client): "FIRST_NAME": "func:recordlinker.linking.matchers.feature_match_fuzzy_string", }, "rule": "func:recordlinker.linking.matchers.eval_perfect_match", - "cluster_ratio": 0.5, } ], } @@ -203,13 +209,13 @@ def test_update(self, client): assert algo.label == "basic" assert algo.is_default is False assert algo.description == "Updated algorithm" + assert algo.belongingness_ratio == (0.25, 0.5) assert len(algo.passes) == 1 assert algo.passes[0].blocking_keys == ["BIRTHDATE"] assert algo.passes[0].evaluators == { "FIRST_NAME": "func:recordlinker.linking.matchers.feature_match_fuzzy_string" } assert algo.passes[0].rule == "func:recordlinker.linking.matchers.eval_perfect_match" - assert algo.passes[0].cluster_ratio == 0.5 assert algo.passes[0].kwargs == {} diff --git a/tests/unit/routes/test_link_router.py b/tests/unit/routes/test_link_router.py index 6a6d4bcb..b5dbfc19 100644 --- a/tests/unit/routes/test_link_router.py +++ b/tests/unit/routes/test_link_router.py @@ -7,6 +7,7 @@ import copy import json +import uuid from unittest import mock import pytest @@ -23,9 +24,7 @@ def test_bundle_with_no_patient(self, patched_subprocess, basic_algorithm, clien patched_subprocess.return_value = basic_algorithm bad_bundle = {"entry": []} expected_response = { - "message": "Supplied bundle contains no Patient resource to link on.", - "found_match": False, - "updated_bundle": bad_bundle, + "detail": "Supplied bundle contains no Patient resource to link on.", } actual_response = client.post( "/link/dibbs", @@ -49,7 +48,10 @@ def test_success(self, patched_subprocess, basic_algorithm, client): for r in new_bundle["entry"] if r.get("resource").get("resourceType") == "Person" ][0] - assert not resp_1.json()["found_match"] + assert resp_1.json()["patient_reference_id"] and uuid.UUID(resp_1.json()["patient_reference_id"]) + assert resp_1.json()["person_reference_id"] == person_1.get("id") + assert resp_1.json()["prediction"] == "no_match" + assert not resp_1.json()["results"] bundle_2 = test_bundle bundle_2["entry"] = [entry_list[1]] @@ -60,13 +62,25 @@ def test_success(self, patched_subprocess, basic_algorithm, client): for r in new_bundle["entry"] if r.get("resource").get("resourceType") == "Person" ][0] - assert resp_2.json()["found_match"] + assert resp_2.json()["patient_reference_id"] and uuid.UUID(resp_2.json()["patient_reference_id"]) + assert resp_2.json()["person_reference_id"] == person_1.get("id") assert person_2.get("id") == person_1.get("id") + assert resp_2.json()["prediction"] == "match" + assert len(resp_2.json()["results"]) == 1 bundle_3 = test_bundle bundle_3["entry"] = [entry_list[2]] resp_3 = client.post("/link/dibbs", json={"bundle": bundle_3}) - assert not resp_3.json()["found_match"] + new_bundle = resp_3.json()["updated_bundle"] + person_3 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_3.json()["patient_reference_id"] and uuid.UUID(resp_3.json()["patient_reference_id"]) + assert resp_3.json()["person_reference_id"] == person_3.get("id") + assert resp_3.json()["prediction"] == "no_match" + assert not resp_3.json()["results"] # Cluster membership success--justified match bundle_4 = test_bundle @@ -78,19 +92,39 @@ def test_success(self, patched_subprocess, basic_algorithm, client): for r in new_bundle["entry"] if r.get("resource").get("resourceType") == "Person" ][0] - assert resp_4.json()["found_match"] + assert resp_4.json()["patient_reference_id"] and uuid.UUID(resp_4.json()["patient_reference_id"]) + assert resp_4.json()["person_reference_id"] == person_4.get("id") assert person_4.get("id") == person_1.get("id") + assert resp_4.json()["prediction"] == "match" + assert len(resp_4.json()["results"]) == 1 bundle_5 = test_bundle bundle_5["entry"] = [entry_list[4]] resp_5 = client.post("/link/dibbs", json={"bundle": bundle_5}) - assert not resp_5.json()["found_match"] + new_bundle = resp_5.json()["updated_bundle"] + person_5 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_5.json()["patient_reference_id"] and uuid.UUID(resp_5.json()["patient_reference_id"]) + assert resp_5.json()["person_reference_id"] == person_5.get("id") + assert resp_5.json()["prediction"] == "no_match" + assert not resp_5.json()["results"] bundle_6 = test_bundle bundle_6["entry"] = [entry_list[5]] resp_6 = client.post("/link/dibbs", json={"bundle": bundle_6}) new_bundle = resp_6.json()["updated_bundle"] - assert not resp_6.json()["found_match"] + person_6 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_6.json()["patient_reference_id"] and uuid.UUID(resp_6.json()["patient_reference_id"]) + assert resp_6.json()["person_reference_id"] == person_6.get("id") + assert resp_6.json()["prediction"] == "no_match" + assert not resp_6.json()["results"] @mock.patch("recordlinker.database.algorithm_service.get_algorithm") def test_enhanced_algo(self, patched_subprocess, enhanced_algorithm, client): @@ -107,7 +141,10 @@ def test_enhanced_algo(self, patched_subprocess, enhanced_algorithm, client): for r in new_bundle["entry"] if r.get("resource").get("resourceType") == "Person" ][0] - assert not resp_1.json()["found_match"] + assert resp_1.json()["patient_reference_id"] and uuid.UUID(resp_1.json()["patient_reference_id"]) + assert resp_1.json()["person_reference_id"] == person_1.get("id") + assert resp_1.json()["prediction"] == "no_match" + assert not resp_1.json()["results"] bundle_2 = test_bundle bundle_2["entry"] = [entry_list[1]] @@ -118,13 +155,25 @@ def test_enhanced_algo(self, patched_subprocess, enhanced_algorithm, client): for r in new_bundle["entry"] if r.get("resource").get("resourceType") == "Person" ][0] - assert resp_2.json()["found_match"] + assert resp_2.json()["patient_reference_id"] and uuid.UUID(resp_2.json()["patient_reference_id"]) + assert resp_2.json()["person_reference_id"] == person_2.get("id") assert person_2.get("id") == person_1.get("id") + assert resp_2.json()["prediction"] == "match" + assert len(resp_2.json()["results"]) == 1 bundle_3 = test_bundle bundle_3["entry"] = [entry_list[2]] resp_3 = client.post("/link/dibbs", json={"bundle": bundle_3, "algorithm": "dibbs-enhanced"}) - assert not resp_3.json()["found_match"] + new_bundle = resp_3.json()["updated_bundle"] + person_3 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_3.json()["patient_reference_id"] and uuid.UUID(resp_3.json()["patient_reference_id"]) + assert resp_3.json()["person_reference_id"] == person_3.get("id") + assert resp_3.json()["prediction"] == "no_match" + assert not resp_3.json()["results"] bundle_4 = test_bundle bundle_4["entry"] = [entry_list[3]] @@ -135,45 +184,46 @@ def test_enhanced_algo(self, patched_subprocess, enhanced_algorithm, client): for r in new_bundle["entry"] if r.get("resource").get("resourceType") == "Person" ][0] - assert resp_4.json()["found_match"] + assert resp_4.json()["patient_reference_id"] and uuid.UUID(resp_4.json()["patient_reference_id"]) + assert resp_4.json()["person_reference_id"] == person_1.get("id") assert person_4.get("id") == person_1.get("id") + assert resp_4.json()["prediction"] == "match" + assert len(resp_4.json()["results"]) == 1 bundle_5 = test_bundle bundle_5["entry"] = [entry_list[4]] resp_5 = client.post("/link/dibbs", json={"bundle": bundle_5, "algorithm": "dibbs-enhanced"}) - assert not resp_5.json()["found_match"] + new_bundle = resp_5.json()["updated_bundle"] + person_5 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_5.json()["patient_reference_id"] and uuid.UUID(resp_5.json()["patient_reference_id"]) + assert resp_5.json()["person_reference_id"] == person_5.get("id") + assert resp_5.json()["prediction"] == "no_match" + assert not resp_5.json()["results"] bundle_6 = test_bundle bundle_6["entry"] = [entry_list[5]] resp_6 = client.post("/link/dibbs", json={"bundle": bundle_6, "algorithm": "dibbs-enhanced"}) new_bundle = resp_6.json()["updated_bundle"] - assert not resp_6.json()["found_match"] + person_6 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_6.json()["patient_reference_id"] and uuid.UUID(resp_6.json()["patient_reference_id"]) + assert resp_6.json()["person_reference_id"] == person_6.get("id") + assert resp_6.json()["prediction"] == "no_match" + assert not resp_6.json()["results"] @mock.patch("recordlinker.database.algorithm_service.get_algorithm") def test_invalid_algorithm_param(self, patched_subprocess, client): patched_subprocess.return_value = None test_bundle = load_test_json_asset("patient_bundle_to_link_with_mpi.json") expected_response = { - "found_match": False, - "updated_bundle": test_bundle, - "message": "Error: No algorithm found", - } - - actual_response = client.post( - "/link/dibbs", json={"bundle": test_bundle, "algorithm": "INVALID"} - ) - - assert actual_response.json() == expected_response - assert actual_response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - - @mock.patch("recordlinker.database.algorithm_service.default_algorithm") - def test_no_default_algorithm(self, patched_subprocess, client): - patched_subprocess.return_value = None - test_bundle = load_test_json_asset("patient_bundle_to_link_with_mpi.json") - expected_response = { - "found_match": False, - "updated_bundle": test_bundle, - "message": "Error: No algorithm found", + "detail": "Error: Invalid algorithm specified", } actual_response = client.post( @@ -201,37 +251,56 @@ def test_link_success(self, patched_subprocess, basic_algorithm, patients, clien "/link", json={"record": json.loads(patients[0].model_dump_json(exclude_none=True))} ) person_1 = response_1.json()["person_reference_id"] - assert not response_1.json()["is_match"] + assert response_1.json()["patient_reference_id"] and uuid.UUID(response_1.json()["patient_reference_id"]) + assert person_1 + assert response_1.json()["prediction"] == "no_match" + assert not response_1.json()["results"] response_2 = client.post( "/link", json={"record": json.loads(patients[1].model_dump_json(exclude_none=True))} ) person_2 = response_2.json()["person_reference_id"] - assert response_2.json()["is_match"] + assert response_2.json()["patient_reference_id"] and uuid.UUID(response_2.json()["patient_reference_id"]) assert person_2 == person_1 + assert response_2.json()["prediction"] == "match" + assert len(response_2.json()["results"]) == 1 response_3 = client.post( "/link", json={"record": json.loads(patients[2].model_dump_json(exclude_none=True))} ) - assert not response_3.json()["is_match"] + person_3 = response_3.json()["person_reference_id"] + assert response_3.json()["patient_reference_id"] and uuid.UUID(response_3.json()["patient_reference_id"]) + assert person_3 + assert response_3.json()["prediction"] == "no_match" + assert not response_3.json()["results"] # Cluster membership success--justified match response_4 = client.post( "/link", json={"record": json.loads(patients[3].model_dump_json(exclude_none=True))} ) person_4 = response_4.json()["person_reference_id"] - assert response_4.json()["is_match"] + assert response_4.json()["patient_reference_id"] and uuid.UUID(response_4.json()["patient_reference_id"]) assert person_4 == person_1 + assert response_4.json()["prediction"] == "match" + assert len(response_2.json()["results"]) == 1 response_5 = client.post( "/link", json={"record": json.loads(patients[4].model_dump_json(exclude_none=True))} ) - assert not response_5.json()["is_match"] + person_5 = response_5.json()["person_reference_id"] + assert response_5.json()["patient_reference_id"] and uuid.UUID(response_5.json()["patient_reference_id"]) + assert person_5 + assert response_5.json()["prediction"] == "no_match" + assert not response_3.json()["results"] response_6 = client.post( "/link", json={"record": json.loads(patients[5].model_dump_json(exclude_none=True))} ) - assert not response_6.json()["is_match"] + person_6 = response_6.json()["person_reference_id"] + assert response_6.json()["patient_reference_id"] and uuid.UUID(response_6.json()["patient_reference_id"]) + assert person_6 + assert response_6.json()["prediction"] == "no_match" + assert not response_6.json()["results"] @mock.patch("recordlinker.database.algorithm_service.get_algorithm") def test_link_enhanced_algorithm( @@ -247,7 +316,10 @@ def test_link_enhanced_algorithm( }, ) person_1 = response_1.json()["person_reference_id"] - assert not response_1.json()["is_match"] + assert response_1.json()["patient_reference_id"] and uuid.UUID(response_1.json()["patient_reference_id"]) + assert person_1 + assert response_1.json()["prediction"] == "no_match" + assert not response_1.json()["results"] response_2 = client.post( "/link", @@ -257,8 +329,10 @@ def test_link_enhanced_algorithm( }, ) person_2 = response_2.json()["person_reference_id"] - assert response_2.json()["is_match"] + assert response_2.json()["patient_reference_id"] and uuid.UUID(response_2.json()["patient_reference_id"]) assert person_2 == person_1 + assert response_2.json()["prediction"] == "match" + assert len(response_2.json()["results"]) == 1 response_3 = client.post( "/link", @@ -267,7 +341,11 @@ def test_link_enhanced_algorithm( "algorithm": "dibbs-enhanced", }, ) - assert not response_3.json()["is_match"] + person_3 = response_3.json()["person_reference_id"] + assert response_3.json()["patient_reference_id"] and uuid.UUID(response_3.json()["patient_reference_id"]) + assert person_3 + assert response_3.json()["prediction"] == "no_match" + assert not response_3.json()["results"] # Cluster membership success--justified match response_4 = client.post( @@ -278,8 +356,10 @@ def test_link_enhanced_algorithm( }, ) person_4 = response_4.json()["person_reference_id"] - assert response_4.json()["is_match"] + assert response_4.json()["patient_reference_id"] and uuid.UUID(response_4.json()["patient_reference_id"]) assert person_4 == person_1 + assert response_4.json()["prediction"] == "match" + assert len(response_4.json()["results"]) == 1 response_5 = client.post( "/link", @@ -288,7 +368,11 @@ def test_link_enhanced_algorithm( "algorithm": "dibbs-enhanced", }, ) - assert not response_5.json()["is_match"] + person_5 = response_5.json()["person_reference_id"] + assert response_5.json()["patient_reference_id"] and uuid.UUID(response_5.json()["patient_reference_id"]) + assert person_5 + assert response_5.json()["prediction"] == "no_match" + assert not response_5.json()["results"] response_6 = client.post( "/link", @@ -297,7 +381,11 @@ def test_link_enhanced_algorithm( "algorithm": "dibbs-enhanced", }, ) - assert not response_6.json()["is_match"] + person_6 = response_6.json()["person_reference_id"] + assert response_6.json()["patient_reference_id"] and uuid.UUID(response_6.json()["patient_reference_id"]) + assert person_6 + assert response_6.json()["prediction"] == "no_match" + assert not response_6.json()["results"] @mock.patch("recordlinker.database.algorithm_service.get_algorithm") def test_link_invalid_algorithm_param(self, patched_subprocess, patients, client): @@ -350,37 +438,56 @@ def test_link_success(self, patched_subprocess, basic_algorithm, client): bundle_1["entry"] = [entry_list[0]] response_1 = client.post("/link/fhir", json={"bundle": bundle_1}) person_1 = response_1.json()["person_reference_id"] - assert not response_1.json()["is_match"] + assert response_1.json()["patient_reference_id"] and uuid.UUID(response_1.json()["patient_reference_id"]) + assert person_1 + assert response_1.json()["prediction"] == "no_match" + assert not response_1.json()["results"] bundle_2 = test_bundle bundle_2["entry"] = [entry_list[1]] response_2 = client.post("/link/fhir", json={"bundle": bundle_2}) person_2 = response_2.json()["person_reference_id"] - assert response_2.json()["is_match"] + assert response_2.json()["patient_reference_id"] and uuid.UUID(response_2.json()["patient_reference_id"]) assert person_2 == person_1 + assert response_2.json()["prediction"] == "match" + assert len(response_2.json()["results"]) == 1 bundle_3 = test_bundle bundle_3["entry"] = [entry_list[2]] response_3 = client.post("/link/fhir", json={"bundle": bundle_3}) - assert not response_3.json()["is_match"] + person_3 = response_3.json()["person_reference_id"] + assert response_3.json()["patient_reference_id"] and uuid.UUID(response_3.json()["patient_reference_id"]) + assert person_3 + assert response_3.json()["prediction"] == "no_match" + assert not response_3.json()["results"] # Cluster membership success--justified match bundle_4 = test_bundle bundle_4["entry"] = [entry_list[3]] response_4 = client.post("/link/fhir", json={"bundle": bundle_4}) person_4 = response_4.json()["person_reference_id"] - assert response_4.json()["is_match"] + assert response_4.json()["patient_reference_id"] and uuid.UUID(response_4.json()["patient_reference_id"]) assert person_4 == person_1 + assert response_4.json()["prediction"] == "match" + assert len(response_4.json()["results"]) == 1 bundle_5 = test_bundle bundle_5["entry"] = [entry_list[4]] response_5 = client.post("/link/fhir", json={"bundle": bundle_5}) - assert not response_5.json()["is_match"] + person_5 = response_5.json()["person_reference_id"] + assert response_5.json()["patient_reference_id"] and uuid.UUID(response_5.json()["patient_reference_id"]) + assert person_5 + assert response_5.json()["prediction"] == "no_match" + assert not response_5.json()["results"] bundle_6 = test_bundle bundle_6["entry"] = [entry_list[5]] response_6 = client.post("/link/fhir", json={"bundle": bundle_6}) - assert not response_6.json()["is_match"] + person_6 = response_6.json()["person_reference_id"] + assert response_6.json()["patient_reference_id"] and uuid.UUID(response_6.json()["patient_reference_id"]) + assert person_6 + assert response_6.json()["prediction"] == "no_match" + assert not response_6.json()["results"] @mock.patch("recordlinker.database.algorithm_service.get_algorithm") def test_link_enhanced_algorithm( @@ -396,7 +503,10 @@ def test_link_enhanced_algorithm( "/link/fhir", json={"bundle": bundle_1, "algorithm": "dibbs-enhanced"} ) person_1 = response_1.json()["person_reference_id"] - assert not response_1.json()["is_match"] + assert response_1.json()["patient_reference_id"] and uuid.UUID(response_1.json()["patient_reference_id"]) + assert person_1 + assert response_1.json()["prediction"] == "no_match" + assert not response_1.json()["results"] bundle_2 = test_bundle bundle_2["entry"] = [entry_list[1]] @@ -404,15 +514,21 @@ def test_link_enhanced_algorithm( "/link/fhir", json={"bundle": bundle_2, "algorithm": "dibbs-enhanced"} ) person_2 = response_2.json()["person_reference_id"] - assert response_2.json()["is_match"] + assert response_2.json()["patient_reference_id"] and uuid.UUID(response_2.json()["patient_reference_id"]) assert person_2 == person_1 + assert response_2.json()["prediction"] == "match" + assert len(response_2.json()["results"]) == 1 bundle_3 = test_bundle bundle_3["entry"] = [entry_list[2]] response_3 = client.post( "/link/fhir", json={"bundle": bundle_3, "algorithm": "dibbs-enhanced"} ) - assert not response_3.json()["is_match"] + person_3 = response_3.json()["person_reference_id"] + assert response_3.json()["patient_reference_id"] and uuid.UUID(response_3.json()["patient_reference_id"]) + assert person_3 + assert response_3.json()["prediction"] == "no_match" + assert not response_3.json()["results"] # Cluster membership success--justified match bundle_4 = test_bundle @@ -421,22 +537,32 @@ def test_link_enhanced_algorithm( "/link/fhir", json={"bundle": bundle_4, "algorithm": "dibbs-enhanced"} ) person_4 = response_4.json()["person_reference_id"] - assert response_4.json()["is_match"] + assert response_4.json()["patient_reference_id"] and uuid.UUID(response_4.json()["patient_reference_id"]) assert person_4 == person_1 + assert response_4.json()["prediction"] == "match" + assert len(response_4.json()["results"]) == 1 bundle_5 = test_bundle bundle_5["entry"] = [entry_list[4]] response_5 = client.post( "/link/fhir", json={"bundle": bundle_5, "algorithm": "dibbs-enhanced"} ) - assert not response_5.json()["is_match"] + person_5 = response_5.json()["person_reference_id"] + assert response_5.json()["patient_reference_id"] and uuid.UUID(response_5.json()["patient_reference_id"]) + assert person_5 + assert response_5.json()["prediction"] == "no_match" + assert not response_5.json()["results"] bundle_6 = test_bundle bundle_6["entry"] = [entry_list[5]] response_6 = client.post( "/link/fhir", json={"bundle": bundle_6, "algorithm": "dibbs-enhanced"} ) - assert not response_6.json()["is_match"] + person_6 = response_6.json()["person_reference_id"] + assert response_6.json()["patient_reference_id"] and uuid.UUID(response_6.json()["patient_reference_id"]) + assert person_6 + assert response_6.json()["prediction"] == "no_match" + assert not response_6.json()["results"] @mock.patch("recordlinker.database.algorithm_service.get_algorithm") def test_linkrecord_invalid_algorithm_param(self, patched_subprocess, client): diff --git a/tests/unit/schemas/test_algorithm.py b/tests/unit/schemas/test_algorithm.py index 91910568..3922d7f7 100644 --- a/tests/unit/schemas/test_algorithm.py +++ b/tests/unit/schemas/test_algorithm.py @@ -8,6 +8,7 @@ import pydantic import pytest +from recordlinker.schemas.algorithm import Algorithm from recordlinker.schemas.algorithm import AlgorithmPass @@ -19,7 +20,6 @@ def test_validate_blocking_keys(self): blocking_keys=keys, evaluators={}, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.5, ) keys = ["LAST_NAME", "BIRTHDATE", "ZIP"] # write an assertion that no exception is raised @@ -27,7 +27,6 @@ def test_validate_blocking_keys(self): blocking_keys=keys, evaluators={}, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.5, ) def test_validate_evaluators(self): @@ -37,7 +36,6 @@ def test_validate_evaluators(self): blocking_keys=[], evaluators=evaluators, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.5, ) evaluators = {"LAST_NAME": "func:recordlinker.linking.matchers.unknown"} with pytest.raises(pydantic.ValidationError): @@ -45,7 +43,6 @@ def test_validate_evaluators(self): blocking_keys=[], evaluators=evaluators, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.5, ) evaluators = {"LAST_NAME": "func:recordlinker.linking.matchers.eval_perfect_match"} with pytest.raises(pydantic.ValidationError): @@ -53,7 +50,6 @@ def test_validate_evaluators(self): blocking_keys=[], evaluators=evaluators, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.5, ) evaluators = {"LAST_NAME": "func:recordlinker.linking.matchers.feature_match_any"} # write an assertion that no exception is raised @@ -61,7 +57,6 @@ def test_validate_evaluators(self): blocking_keys=[], evaluators=evaluators, rule="func:recordlinker.linking.matchers.eval_perfect_match", - cluster_ratio=0.5, ) def test_validate_rule(self): @@ -71,7 +66,6 @@ def test_validate_rule(self): blocking_keys=[], evaluators={}, rule=rule, - cluster_ratio=0.5, ) rule = "func:recordlinker.linking.matchers.feature_match_any" with pytest.raises(pydantic.ValidationError): @@ -79,19 +73,58 @@ def test_validate_rule(self): blocking_keys=[], evaluators={}, rule=rule, - cluster_ratio=0.5, ) rule = "fn:recordlinker.linking.matchers.eval_perfect_match" AlgorithmPass( blocking_keys=[], evaluators={}, rule=rule, - cluster_ratio=0.5, ) rule = "recordlinker.linking.matchers.eval_perfect_match" AlgorithmPass( blocking_keys=[], evaluators={}, rule=rule, - cluster_ratio=0.5, ) + + +class TestAlgorithm: + def test_validate_belongingness_ratio(self): + belongingness_ratio=(0.9, 0.75) + with pytest.raises(pydantic.ValidationError): + Algorithm( + label="label", + belongingness_ratio=belongingness_ratio, + passes=[ + AlgorithmPass( + blocking_keys=[], + evaluators={}, + rule="func:recordlinker.linking.matchers.eval_perfect_match", + ) + ] + ) + belongingness_ratio=(0.75, 0.9) + Algorithm( + label="label", + belongingness_ratio=belongingness_ratio, + passes=[ + AlgorithmPass( + blocking_keys=[], + evaluators={}, + rule="func:recordlinker.linking.matchers.eval_perfect_match", + ) + ] + ) + belongingness_ratio=(0.9, 0.9) + Algorithm( + label="label", + belongingness_ratio=belongingness_ratio, + passes=[ + AlgorithmPass( + blocking_keys=[], + evaluators={}, + rule="func:recordlinker.linking.matchers.eval_perfect_match", + ) + ] + ) +