Skip to content

Commit

Permalink
style(matchers): add prefixes
Browse files Browse the repository at this point in the history
  • Loading branch information
alhayward committed Nov 27, 2024
1 parent 81d735d commit 396dc4f
Show file tree
Hide file tree
Showing 10 changed files with 154 additions and 154 deletions.
12 changes: 6 additions & 6 deletions docs/site/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,12 @@ patient data and used during query retrieval. The following blocking key types a
These are the functions that can be used to evaluate the matching results as a collection, thus
determining it the incoming payload is a match or not to an existing Patient record.

`func:recordlinker.linking.matchers.match_rule`
`func:recordlinker.linking.matchers.rule_match`

: Determines whether a given set of feature comparisons represent a 'perfect' match
(i.e. all features that were compared match in whatever criteria was specified).

`func:recordlinker.linking.matchers.probabilistic_match_rule`
`func:recordlinker.linking.matchers.rule_probabilistic_match`

: Determines whether a given set of feature comparisons matches enough to be the
result of a true patient link instead of just random chance. This is represented
Expand All @@ -143,22 +143,22 @@ matching is designed to compare one list of values to another list of values. F
incoming record could have a GIVEN_NAME of ["John", "Dean"] and we could be comparing them to an
existing Patient with the GIVEN_NAME of ["John", "D"].

`func:recordlinker.linking.matchers.exact_match_any`
`func:recordlinker.linking.matchers.compare_match_any`

: Determines if any of the features are a direct match.

`func:recordlinker.linking.matchers.exact_match_all`
`func:recordlinker.linking.matchers.compare_match_all`

: Determines if all of the features are a direct match.

`func:recordlinker.linking.matchers.fuzzy_match`
`func:recordlinker.linking.matchers.compare_fuzzy_match`

: Determines if the features are a fuzzy match based on a string comparison.
JaroWinkler, Levenshtein and Damerau-Levenshtein are supported, with JaroWinkler as the default.
Use the `kwargs` parameter to specify the desired algorithm and thresholds.
Example: `{"kwargs": {"similarity_measure": "levenshtein", "thresholds": {"FIRST_NAME": 0.8}}}`

`func:recordlinker.linking.matchers.probabilistic_fuzzy_match`
`func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match`

: Similar to the above function, but uses a log-odds ratio to determine if the features are a match
probabilistically. This is useful when wanting to more robustly compare features by incorporating
Expand Down
24 changes: 12 additions & 12 deletions src/recordlinker/assets/initial_algorithms.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
"evaluators": [
{
"feature": "FIRST_NAME",
"func": "func:recordlinker.linking.matchers.fuzzy_match"
"func": "func:recordlinker.linking.matchers.compare_fuzzy_match"
},
{
"feature": "LAST_NAME",
"func": "func:recordlinker.linking.matchers.exact_match_all"
"func": "func:recordlinker.linking.matchers.compare_match_all"
}
],
"rule": "func:recordlinker.linking.matchers.match_rule",
"rule": "func:recordlinker.linking.matchers.rule_match",
"kwargs": {
"thresholds": {
"FIRST_NAME": 0.9,
Expand All @@ -44,14 +44,14 @@
"evaluators": [
{
"feature": "ADDRESS",
"func": "func:recordlinker.linking.matchers.fuzzy_match"
"func": "func:recordlinker.linking.matchers.compare_fuzzy_match"
},
{
"feature": "BIRTHDATE",
"func": "func:recordlinker.linking.matchers.exact_match_all"
"func": "func:recordlinker.linking.matchers.compare_match_all"
}
],
"rule": "func:recordlinker.linking.matchers.match_rule",
"rule": "func:recordlinker.linking.matchers.rule_match",
"kwargs": {
"thresholds": {
"FIRST_NAME": 0.9,
Expand Down Expand Up @@ -81,14 +81,14 @@
"evaluators": [
{
"feature": "FIRST_NAME",
"func": "func:recordlinker.linking.matchers.probabilistic_fuzzy_match"
"func": "func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
},
{
"feature": "LAST_NAME",
"func": "func:recordlinker.linking.matchers.probabilistic_fuzzy_match"
"func": "func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
}
],
"rule": "func:recordlinker.linking.matchers.probabilistic_match_rule",
"rule": "func:recordlinker.linking.matchers.rule_probabilistic_match",
"kwargs": {
"similarity_measure": "JaroWinkler",
"thresholds": {
Expand Down Expand Up @@ -123,14 +123,14 @@
"evaluators": [
{
"feature": "ADDRESS",
"func": "func:recordlinker.linking.matchers.probabilistic_fuzzy_match"
"func": "func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
},
{
"feature": "BIRTHDATE",
"func": "func:recordlinker.linking.matchers.probabilistic_fuzzy_match"
"func": "func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
}
],
"rule": "func:recordlinker.linking.matchers.probabilistic_match_rule",
"rule": "func:recordlinker.linking.matchers.rule_probabilistic_match",
"kwargs": {
"similarity_measure": "JaroWinkler",
"thresholds": {
Expand Down
26 changes: 13 additions & 13 deletions src/recordlinker/linking/matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class RuleFunc(enum.Enum):
the algorithm.
"""

MATCH_RULE = "func:recordlinker.linking.matchers.match_rule"
PROBABILISTIC_MATCH_RULE = "func:recordlinker.linking.matchers.probabilistic_match_rule"
RULE_MATCH = "func:recordlinker.linking.matchers.rule_match"
RULE_PROBABILISTIC_MATCH = "func:recordlinker.linking.matchers.rule_probabilistic_match"


class FeatureFunc(enum.Enum):
Expand All @@ -44,11 +44,11 @@ class FeatureFunc(enum.Enum):
matching, based on the configuration of the algorithm.
"""

EXACT_MATCH_ANY = "func:recordlinker.linking.matchers.exact_match_any"
EXACT_MATCH_ALL = "func:recordlinker.linking.matchers.exact_match_all"
FUZZY_MATCH = "func:recordlinker.linking.matchers.fuzzy_match"
PROBABILISTIC_FUZZY_MATCH = (
"func:recordlinker.linking.matchers.probabilistic_fuzzy_match"
COMPARE_MATCH_ANY = "func:recordlinker.linking.matchers.compare_match_any"
COMPARE_MATCH_ALL = "func:recordlinker.linking.matchers.compare_match_all"
COMPARE_FUZZY_MATCH = "func:recordlinker.linking.matchers.compare_fuzzy_match"
COMPARE_PROBABILISTIC_FUZZY_MATCH = (
"func:recordlinker.linking.matchers.compare_probabilistic_fuzzy_match"
)


Expand Down Expand Up @@ -99,7 +99,7 @@ def _get_fuzzy_params(col: str, **kwargs) -> tuple[SIMILARITY_MEASURES, float]:
return (similarity_measure, threshold)


def match_rule(feature_comparisons: list[float], **kwargs: typing.Any) -> bool:
def rule_match(feature_comparisons: list[float], **kwargs: typing.Any) -> bool:
"""
Determines whether a given set of feature comparisons represent a
'perfect' match (i.e. whether all features that were compared match
Expand All @@ -112,7 +112,7 @@ def match_rule(feature_comparisons: list[float], **kwargs: typing.Any) -> bool:
return sum(feature_comparisons) == len(feature_comparisons)


def probabilistic_match_rule(feature_comparisons: list[float], **kwargs: typing.Any) -> bool:
def rule_probabilistic_match(feature_comparisons: list[float], **kwargs: typing.Any) -> bool:
"""
Determines whether a given set of feature comparisons matches enough
to be the result of a true patient link instead of just random chance.
Expand All @@ -129,7 +129,7 @@ def probabilistic_match_rule(feature_comparisons: list[float], **kwargs: typing.
return sum(feature_comparisons) >= float(threshold)


def exact_match_any(
def compare_match_any(
record: PIIRecord, patient: Patient, key: Feature, **kwargs: typing.Any
) -> float:
"""
Expand All @@ -147,7 +147,7 @@ def exact_match_any(
return float(bool(rec_values & pat_values))


def exact_match_all(
def compare_match_all(
record: PIIRecord, patient: Patient, key: Feature, **kwargs: typing.Any
) -> float:
"""
Expand All @@ -165,7 +165,7 @@ def exact_match_all(
return float(rec_values == pat_values)


def fuzzy_match(
def compare_fuzzy_match(
record: PIIRecord, patient: Patient, key: Feature, **kwargs: typing.Any
) -> float:
"""
Expand All @@ -189,7 +189,7 @@ def fuzzy_match(
return 0


def probabilistic_fuzzy_match(
def compare_probabilistic_fuzzy_match(
record: PIIRecord, patient: Patient, key: Feature, **kwargs: typing.Any
) -> float:
"""
Expand Down
24 changes: 12 additions & 12 deletions tests/unit/database/test_algorithm_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ def test_load_algorithm_created(self, session):
evaluators=[
{
"feature": "ZIP",
"func": "func:recordlinker.linking.matchers.exact_match_any",
"func": "func:recordlinker.linking.matchers.compare_match_any",
}
],
rule="func:recordlinker.linking.matchers.match_rule",
rule="func:recordlinker.linking.matchers.rule_match",
)
],
)
Expand All @@ -91,9 +91,9 @@ def test_load_algorithm_created(self, session):
assert obj.passes[0].algorithm_id == 1
assert obj.passes[0].blocking_keys == ["FIRST_NAME"]
assert obj.passes[0].evaluators == [
{"feature": "ZIP", "func": "func:recordlinker.linking.matchers.exact_match_any"}
{"feature": "ZIP", "func": "func:recordlinker.linking.matchers.compare_match_any"}
]
assert obj.passes[0].rule == "func:recordlinker.linking.matchers.match_rule"
assert obj.passes[0].rule == "func:recordlinker.linking.matchers.rule_match"

def test_load_algorithm_updated(self, session):
data = schemas.Algorithm(
Expand All @@ -106,10 +106,10 @@ def test_load_algorithm_updated(self, session):
evaluators=[
{
"feature": "ZIP",
"func": "func:recordlinker.linking.matchers.exact_match_any",
"func": "func:recordlinker.linking.matchers.compare_match_any",
}
],
rule="func:recordlinker.linking.matchers.match_rule",
rule="func:recordlinker.linking.matchers.rule_match",
)
],
)
Expand All @@ -128,9 +128,9 @@ def test_load_algorithm_updated(self, session):
assert obj.passes[0].algorithm_id == 1
assert obj.passes[0].blocking_keys == ["LAST_NAME"]
assert obj.passes[0].evaluators == [
{"feature": "ZIP", "func": "func:recordlinker.linking.matchers.exact_match_any"}
{"feature": "ZIP", "func": "func:recordlinker.linking.matchers.compare_match_any"}
]
assert obj.passes[0].rule == "func:recordlinker.linking.matchers.match_rule"
assert obj.passes[0].rule == "func:recordlinker.linking.matchers.rule_match"


def test_delete_algorithm(session):
Expand All @@ -142,9 +142,9 @@ def test_delete_algorithm(session):
algorithm=algo1,
blocking_keys=["FIRST_NAME"],
evaluators=[
{"feature": "ZIP", "func": "func:recordlinker.linking.matchers.exact_match_any"}
{"feature": "ZIP", "func": "func:recordlinker.linking.matchers.compare_match_any"}
],
rule="func:recordlinker.linking.matchers.match_rule",
rule="func:recordlinker.linking.matchers.rule_match",
)
session.add(pass1)
session.commit()
Expand All @@ -163,9 +163,9 @@ def test_clear_algorithms(session):
algorithm=algo1,
blocking_keys=["FIRST_NAME"],
evaluators=[
{"feature": "ZIP", "func": "func:recordlinker.linking.matchers.exact_match_any"}
{"feature": "ZIP", "func": "func:recordlinker.linking.matchers.compare_match_any"}
],
rule="func:recordlinker.linking.matchers.match_rule",
rule="func:recordlinker.linking.matchers.rule_match",
)
session.add(pass1)
session.commit()
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/linking/test_link.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def test_compare_match(self):
algorithm_id=1,
blocking_keys=[1],
evaluators=[
{"feature": "FIRST_NAME", "func": "func:recordlinker.linking.matchers.exact_match_all"},
{"feature": "LAST_NAME", "func": "func:recordlinker.linking.matchers.fuzzy_match"},
{"feature": "FIRST_NAME", "func": "func:recordlinker.linking.matchers.compare_match_all"},
{"feature": "LAST_NAME", "func": "func:recordlinker.linking.matchers.compare_fuzzy_match"},
],
rule="func:recordlinker.linking.matchers.match_rule",
rule="func:recordlinker.linking.matchers.rule_match",
kwargs={},
)

Expand Down Expand Up @@ -89,10 +89,10 @@ def test_compare_no_match(self):
algorithm_id=1,
blocking_keys=[1],
evaluators=[
{"feature": "FIRST_NAME", "func": "func:recordlinker.linking.matchers.exact_match_all"},
{"feature": "LAST_NAME", "func": "func:recordlinker.linking.matchers.exact_match_all"},
{"feature": "FIRST_NAME", "func": "func:recordlinker.linking.matchers.compare_match_all"},
{"feature": "LAST_NAME", "func": "func:recordlinker.linking.matchers.compare_match_all"},
],
rule="func:recordlinker.linking.matchers.match_rule",
rule="func:recordlinker.linking.matchers.rule_match",
kwargs={},
)

Expand Down
Loading

0 comments on commit 396dc4f

Please sign in to comment.