moj-analytical-services · RobinL · Sep 12, 2024 · Sep 12, 2024 · Sep 12, 2024 · Sep 12, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Match weight and m and u probabilities charts now have improved tooltips ([#2392](https://github.com/moj-analytical-services/splink/pull/2392))
+- Added new `AbsoluteDifferenceLevel` comparison level for numerical columns ([#2398](https://github.com/moj-analytical-services/splink/pull/2398))
 
 ### Fixed
 

diff --git a/splink/comparison_level_library.py b/splink/comparison_level_library.py
@@ -1,5 +1,6 @@
 from splink.internals.comparison_level_library import (
     AbsoluteDateDifferenceLevel,
+    AbsoluteDifferenceLevel,
     AbsoluteTimeDifferenceLevel,
     And,
     ArrayIntersectLevel,
@@ -39,6 +40,7 @@
     "DistanceInKMLevel",
     "ArrayIntersectLevel",
     "PercentageDifferenceLevel",
+    "AbsoluteDifferenceLevel",
     "And",
     "Not",
     "Or",

diff --git a/splink/internals/comparison_level_library.py b/splink/internals/comparison_level_library.py
@@ -843,3 +843,37 @@ def create_label_for_charts(self) -> str:
             f"Percentage difference of '{col.label}' "
             f"within {self.percentage_threshold:,.2%}"
         )
+
+
+class AbsoluteDifferenceLevel(ComparisonLevelCreator):
+    def __init__(
+        self,
+        col_name: Union[str, ColumnExpression],
+        difference_threshold: Union[int, float],
+    ):
+        """
+        Represents a comparison level where the absolute difference between two
+        numerical values is within a specified threshold.
+
+        Args:
+            col_name (str | ColumnExpression): Input column name or ColumnExpression.
+            difference_threshold (int | float): The maximum allowed absolute difference
+                between the two values.
+        """
+        self.col_expression = ColumnExpression.instantiate_if_str(col_name)
+        self.difference_threshold = validate_numeric_parameter(
+            lower_bound=0,
+            upper_bound=float("inf"),
+            parameter_value=difference_threshold,
+            level_name=self.__class__.__name__,
+            parameter_name="difference_threshold",
+        )
+
+    def create_sql(self, sql_dialect: SplinkDialect) -> str:
+        self.col_expression.sql_dialect = sql_dialect
+        col = self.col_expression
+        return f"ABS({col.name_l} - {col.name_r}) <= {self.difference_threshold}"
+
+    def create_label_for_charts(self) -> str:
+        col = self.col_expression
+        return f"Absolute difference of '{col.label}' <= {self.difference_threshold}"
diff --git a/tests/test_comparison_level_lib.py b/tests/test_comparison_level_lib.py
@@ -292,3 +292,74 @@ def test_damerau_levenshtein_level(test_helpers, dialect):
     ]
 
     run_comparison_vector_value_tests(test_cases, db_api)
+
+
+@mark_with_dialects_excluding()
+def test_absolute_difference(test_helpers, dialect):
+    helper = test_helpers[dialect]
+    db_api = helper.extra_linker_args()["db_api"]
+
+    abs_comparison = cl.CustomComparison(
+        comparison_description="amount",
+        comparison_levels=[
+            cll.NullLevel("amount"),
+            cll.AbsoluteDifferenceLevel("amount", 0),  # 5
+            cll.AbsoluteDifferenceLevel("amount", 5),  # 4
+            cll.AbsoluteDifferenceLevel("amount", 10),  # 3
+            cll.AbsoluteDifferenceLevel("amount", 20),  # 2
+            cll.AbsoluteDifferenceLevel("amount", 50),  # 1
+            cll.ElseLevel(),
+        ],
+    )
+
+    test_cases = [
+        {
+            "comparison": abs_comparison,
+            "inputs": [
+                {
+                    "amount_l": 100,
+                    "amount_r": 100,
+                    "expected_value": 5,
+                    "expected_label": "Absolute difference of 'amount' <= 0",
+                },
+                {
+                    "amount_l": 100,
+                    "amount_r": 103,
+                    "expected_value": 4,
+                    "expected_label": "Absolute difference of 'amount' <= 5",
+                },
+                {
+                    "amount_l": 100,
+                    "amount_r": 108,
+                    "expected_value": 3,
+                    "expected_label": "Absolute difference of 'amount' <= 10",
+                },
+                {
+                    "amount_l": 100,
+                    "amount_r": 115,
+                    "expected_value": 2,
+                    "expected_label": "Absolute difference of 'amount' <= 20",
+                },
+                {
+                    "amount_l": 100,
+                    "amount_r": 140,
+                    "expected_value": 1,
+                    "expected_label": "Absolute difference of 'amount' <= 50",
+                },
+                {
+                    "amount_l": 100,
+                    "amount_r": 200,
+                    "expected_value": 0,
+                    "expected_label": "All other comparisons",
+                },
+                {
+                    "amount_l": None,
+                    "amount_r": 100,
+                    "expected_value": -1,
+                    "expected_label": "amount is NULL",
+                },
+            ],
+        },
+    ]
+
+    run_comparison_vector_value_tests(test_cases, db_api)