From 9744913b5a2fedd2f61125316226413330aea133 Mon Sep 17 00:00:00 2001
From: ADBond <48208438+ADBond@users.noreply.github.com>
Date: Thu, 18 Jan 2024 12:27:38 +0000
Subject: [PATCH 1/4] adjust tests for DateComparison

---
 tests/test_comparison_template_lib.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tests/test_comparison_template_lib.py b/tests/test_comparison_template_lib.py
index ff8e849fea..7d97bdd13c 100644
--- a/tests/test_comparison_template_lib.py
+++ b/tests/test_comparison_template_lib.py
@@ -2,6 +2,7 @@
 import pytest
 
 import splink.comparison_template_library as ctl
+from splink.column_expression import ColumnExpression
 
 from .decorator import mark_with_dialects_excluding
 
@@ -18,7 +19,7 @@ def test_date_comparison_run(dialect):
 @mark_with_dialects_excluding("postgres", "sqlite")
 def test_date_comparison_dl_run(dialect):
     ctl.DateComparison(
-        "date", levenshtein_thresholds=[1], damerau_levenshtein_thresholds=[]
+        "date", fuzzy_thresholds=[1], fuzzy_metric="levenshtein"
     ).get_comparison(dialect)
 
 
@@ -80,17 +81,26 @@ def test_datediff_levels(dialect, test_helpers, test_gamma_assert):
     # Generate our various settings objs
     settings = {
         "link_type": "dedupe_only",
-        "comparisons": [ctl.DateComparison("dob", cast_strings_to_date=True)],
+        "comparisons": [
+            ctl.DateComparison(
+                # TODO: revert to default damerau_levenshtein metric
+                ColumnExpression("dob").try_parse_date(), fuzzy_metric="levenshtein", fuzzy_thresholds=[2]
+            )
+        ],
     }
 
     # We need to put our column in datetime format for this to work
 
     df = helper.convert_frame(df)
     linker = helper.Linker(df, settings, **helper.extra_linker_args())
+    linker.debug_mode = True
     linker_output = linker.predict().as_pandas_dataframe()
 
     # # Dict key: {gamma_level value: size}
-    size_gamma_lookup = {0: 8, 1: 15, 2: 5, 3: 5, 4: 1, 5: 2}
+    # 0 - else, 1 - 10 years, 2 - 1 year, 3 - 1 month, 4 - fuzzy, 5 - date match
+    size_gamma_lookup = {0: 8, 1: 15, 2: 5, 3: 3, 4: 3, 5: 2}
+    # Dam-lev version - difference between 1-months and fuzzy level
+    # size_gamma_lookup = {0: 8, 1: 15, 2: 6, 3: 5, 4: 1, 5: 2}
 
     # Check gamma sizes are as expected
     for gamma, expected_size in size_gamma_lookup.items():
@@ -457,7 +467,7 @@ def test_email_comparison_levels(dialect, test_helpers, test_gamma_assert):
             ctl.EmailComparison(
                 col_name=col_name,
                 invalid_emails_as_null=True,
-                thresholds=[2],
+                fuzzy_thresholds=[2],
                 fuzzy_metric="damerau_levenshtein",
                 include_domain_match_level=True,
             )

From b44da759d61181007bd4f8965b72069ef882b3e0 Mon Sep 17 00:00:00 2001
From: ADBond <48208438+ADBond@users.noreply.github.com>
Date: Thu, 18 Jan 2024 13:00:19 +0000
Subject: [PATCH 2/4] adjust test for NameComparison (with new defaults)

---
 tests/test_comparison_template_lib.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/tests/test_comparison_template_lib.py b/tests/test_comparison_template_lib.py
index 7d97bdd13c..30cffb73f0 100644
--- a/tests/test_comparison_template_lib.py
+++ b/tests/test_comparison_template_lib.py
@@ -84,7 +84,9 @@ def test_datediff_levels(dialect, test_helpers, test_gamma_assert):
         "comparisons": [
             ctl.DateComparison(
                 # TODO: revert to default damerau_levenshtein metric
-                ColumnExpression("dob").try_parse_date(), fuzzy_metric="levenshtein", fuzzy_thresholds=[2]
+                ColumnExpression("dob").try_parse_date(),
+                fuzzy_metric="levenshtein",
+                fuzzy_thresholds=[2],
             )
         ],
     }
@@ -216,29 +218,24 @@ def test_name_comparison_levels(dialect, test_helpers):
     linker_output = linker.predict().as_pandas_dataframe()
 
     # # Dict key: {gamma_level value: size}
-    size_gamma_lookup = {0: 6, 1: 4, 2: 0, 3: 2, 4: 2, 5: 1}
-    # 5: exact_match
-    # 4: dmetaphone exact match
-    # 3: damerau_levenshtein <= 1
+    size_gamma_lookup = {0: 6, 1: 6, 2: 0, 3: 2, 4: 1}
+    # 4: exact_match
+    # 3: dmetaphone exact match
     # 2: jaro_winkler > 0.9
     # 1: jaro_winkler > 0.8
     # 0: else
 
     # Check gamma sizes are as expected
     for gamma, expected_size in size_gamma_lookup.items():
-        assert (
-            sum(linker_output["gamma_custom_first_name_first_name_metaphone"] == gamma)
-            == expected_size
-        )
+        assert sum(linker_output["gamma_first_name"] == gamma) == expected_size
 
     # Check individual IDs are assigned to the correct gamma values
     # Dict key: {gamma_value: tuple of ID pairs}
     size_gamma_lookup = {
-        5: [[1, 6]],
-        4: [(2, 3), (4, 5)],
-        3: [(4, 6)],
+        4: [[1, 6]],
+        3: [(2, 3), (4, 5)],
         2: [],
-        1: [(1, 2), (2, 6)],
+        1: [(1, 2), (2, 6), (4, 6)],
         0: [(2, 4), (5, 6)],
     }
 
@@ -248,7 +245,7 @@ def test_name_comparison_levels(dialect, test_helpers):
                 linker_output.loc[
                     (linker_output.unique_id_l == left)
                     & (linker_output.unique_id_r == right)
-                ]["gamma_custom_first_name_first_name_metaphone"].values[0]
+                ]["gamma_first_name"].values[0]
                 == gamma
             )
 

From 1eda05360dcc1bf0827087615f64e648a384ae87 Mon Sep 17 00:00:00 2001
From: ADBond <48208438+ADBond@users.noreply.github.com>
Date: Thu, 18 Jan 2024 13:37:32 +0000
Subject: [PATCH 3/4] postcode comparison test update name

---
 tests/test_comparison_template_lib.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_comparison_template_lib.py b/tests/test_comparison_template_lib.py
index 30cffb73f0..c320ee5183 100644
--- a/tests/test_comparison_template_lib.py
+++ b/tests/test_comparison_template_lib.py
@@ -350,7 +350,7 @@ def test_forename_surname_comparison_levels(dialect, test_helpers):
             )
 
 
-# postcode_comparison
+# PostcodeComparison
 
 
 @mark_with_dialects_excluding("postgres", "sqlite")
@@ -409,7 +409,7 @@ def test_postcode_comparison_levels(dialect, test_helpers, test_gamma_assert):
     settings = {
         "link_type": "dedupe_only",
         "comparisons": [
-            ctl.postcode_comparison(
+            ctl.PostcodeComparison(
                 col_name=col_name,
                 lat_col="lat",
                 long_col="long",

From aa5c5516f5a510a15b84e3c8d317e0583f290c75 Mon Sep 17 00:00:00 2001
From: ADBond <48208438+ADBond@users.noreply.github.com>
Date: Thu, 18 Jan 2024 16:26:17 +0000
Subject: [PATCH 4/4] fornamesurname test adjust gamma name

---
 tests/test_comparison_template_lib.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_comparison_template_lib.py b/tests/test_comparison_template_lib.py
index c320ee5183..6e6495616e 100644
--- a/tests/test_comparison_template_lib.py
+++ b/tests/test_comparison_template_lib.py
@@ -322,7 +322,7 @@ def test_forename_surname_comparison_levels(dialect, test_helpers):
 
     # Check gamma sizes are as expected
     for gamma, expected_size in size_gamma_lookup.items():
-        gamma_matches = linker_output.filter(like="gamma_custom") == gamma
+        gamma_matches = linker_output.filter(like="gamma_forename_surname") == gamma
         gamma_matches_size = gamma_matches.sum().values[0]
         assert gamma_matches_size == expected_size
 
@@ -344,7 +344,7 @@ def test_forename_surname_comparison_levels(dialect, test_helpers):
                     (linker_output.unique_id_l == left)
                     & (linker_output.unique_id_r == right)
                 ]
-                .filter(like="gamma_custom")
+                .filter(like="gamma_forename_surname")
                 .values[0][0]
                 == gamma
             )