Skip to content

Commit

Permalink
Merge branch 'splink4_dev' into validate_distance_thresholds
Browse files Browse the repository at this point in the history
ThomasHepworth authored Nov 10, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents 9a9d0af + 73d3e9e commit 73f4ff7
Showing 2 changed files with 5 additions and 2 deletions.
5 changes: 3 additions & 2 deletions splink/comparison_level_library.py
Original file line number Diff line number Diff line change
@@ -92,14 +92,15 @@ class JaroWinklerLevel(ComparisonLevelCreator):
def __init__(self, col_name: str, distance_threshold: Union[int, float]):
"""A comparison level using a Jaro-Winkler distance function
e.g. `jaro_winkler(val_l, val_r) <= distance_threshold`
e.g. `jaro_winkler(val_l, val_r) >= distance_threshold`
Args:
col_name (str): Input column name
distance_threshold (Union[int, float]): The threshold to use to assess
similarity
"""
self.col_name = col_name

super().__init__(col_name)
self.distance_threshold = validate_distance_threshold(
lower_bound=0,
upper_bound=1,
2 changes: 2 additions & 0 deletions splink/dialects.py
Original file line number Diff line number Diff line change
@@ -60,6 +60,8 @@ class SqliteDialect(SplinkDialect):
def name(self):
return "sqlite"

# SQLite does not natively support string distance functions.
# However, sqlite UDFs are registered automatically by Splink
@property
def levenshtein_function_name(self):
return "levenshtein"

0 comments on commit 73f4ff7

Please sign in to comment.