add precision option to equality test

dbt-labs · Feb 6, 2023 · 90327e1 · 90327e1
1 parent 52d9bb5
commit 90327e1
Show file tree

Hide file tree

Showing 7 changed files with 82 additions and 18 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,11 @@
 ## Contributors:
 --->
 
+# Unreleased
+## New features
+- The `equality` test now accepts an additional argument, `precision` to aide in comparing floating point numbers ([#757](https://github.com/dbt-labs/dbt-utils/issues/757), [#765](https://github.com/dbt-labs/dbt-utils/pull/765))
+## Contributors: @rlh1994
+
 # dbt utils v1.0
 
 ## Migration Guide 

diff --git a/README.md b/README.md
@@ -113,7 +113,7 @@ This test supports the `group_by_columns` parameter; see [Grouping in tests](#gr
 
 #### equality ([source](macros/generic_tests/equality.sql))
 
-Asserts the equality of two relations. Optionally specify a subset of columns to compare.
+Asserts the equality of two relations. Optionally specify a subset of columns to compare, and a precision to compare numeric columns on.
 
 **Usage:**
 
@@ -128,6 +128,7 @@ models:
           compare_columns:
             - first_column
             - second_column
+          precision: 4
 ```
 
 #### expression_is_true ([source](macros/generic_tests/expression_is_true.sql))

diff --git a/integration_tests/data/etc/data_test_equality_floats.csv b/integration_tests/data/etc/data_test_equality_floats.csv
@@ -0,0 +1,11 @@
+id,float_number
+05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186
+cfae9054-940b-42a1-84d4-052daae6194f,81.2511656
+6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675
+c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841
+59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434
+b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425
+26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223
+b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680
+11c979b7-2661-4375-8143-7c9b54b90627,19.5755431
+a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
@@ -50,7 +50,7 @@ seeds:
     sql:
       data_events_20180103:
         +schema: events
-      
+
       data_get_column_values_dropped:
         # this.incorporate() to hardcode the node's type as otherwise dbt doesn't know it yet
         +post-hook: "{% do adapter.drop_relation(this.incorporate(type='table')) %}"
@@ -71,3 +71,8 @@ seeds:
       data_test_sequential_timestamps:
         +column_types:
           my_timestamp: timestamp
+
+    etc:
+      data_test_equality_floats:
+        +column_types:
+          float_number: float
diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml
@@ -89,14 +89,14 @@ seeds:
           upper_bound_column: valid_to
           partition_by: subscription_id
           zero_length_range_allowed: true
- 
+
   - name: data_unique_combination_of_columns
     tests:
       - dbt_utils.unique_combination_of_columns:
           combination_of_columns:
             - month
             - product
-  
+
   - name: data_cardinality_equality_a
     columns:
       - name: same_name
@@ -191,7 +191,18 @@ models:
             - first_name
             - last_name
             - email
-
+
+  - name: test_equality_floats
+    tests:
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_floats')
+          precision: 4
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_floats')
+          precision: 8
+          error_if: "<1" #sneaky way to ensure that the test is returning failing rows
+          warn_if: "<0"
+
   - name: test_fewer_rows_than
     tests:
       - dbt_utils.fewer_rows_than:

diff --git a/integration_tests/models/generic_tests/test_equality_floats.sql b/integration_tests/models/generic_tests/test_equality_floats.sql
@@ -0,0 +1,9 @@
+with data as (
+
+    select * from {{ ref('data_test_equality_floats') }}
+
+)
+
+select
+    id, float_number + 0.0000001 as float_number
+from data
diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql
@@ -1,8 +1,8 @@
-{% test equality(model, compare_model, compare_columns=None) %}
-  {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns)) }}
+{% test equality(model, compare_model, compare_columns=None, precision = None) %}
+  {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, precision)) }}
 {% endtest %}
 
-{% macro default__test_equality(model, compare_model, compare_columns=None) %}
+{% macro default__test_equality(model, compare_model, compare_columns=None, precision = None) %}
 
 {% set set_diff %}
     count(*) + coalesce(abs(
@@ -22,17 +22,39 @@
 -- setup
 {%- do dbt_utils._is_relation(model, 'test_equality') -%}
 
-{#-
-If the compare_cols arg is provided, we can run this test without querying the
-information schema — this allows the model to be an ephemeral model
--#}
-
-{%- if not compare_columns -%}
+{%- if not precision -%}
+    {#-
+        If the compare_cols arg is provided, we can run this test without querying the
+        information schema — this allows the model to be an ephemeral model
+    -#}
+    {%- if not compare_columns -%}
+        {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
+        {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%}
+    {%- endif -%}
+
+    {% set compare_cols_csv = compare_columns | join(', ') %}
+{% else %}
+    {#-
+        If rounding is required, we need to get the types, so it can't be ephermeral
+    -#}
     {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
-    {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%}
-{%- endif -%}
-
-{% set compare_cols_csv = compare_columns | join(', ') %}
+    {%- set columns = adapter.get_columns_in_relation(model) -%}
+
+    {% set columns_list = [] %}
+    {%- for col in columns -%}
+        {%- if (compare_columns and col.name|lower in compare_columns|map('lower')) or not compare_columns -%}
+            {# Databricks double type is not picked up by any number type checks in dbt #}
+            {%- if col.is_float() or col.is_numeric() or col.data_type == 'double' -%}
+                {# Cast is required due to postgres not having round for a double precision number #}
+                {%- do columns_list.append('round(cast(' ~ col.name ~ ' as ' ~ dbt.type_numeric() ~ '),' ~ precision ~ ') as ' ~ col.name) -%}
+            {%- else -%}
+                {%- do columns_list.append(col.name) -%}
+            {%- endif -%}
+        {% endif %}
+    {%- endfor -%}
+
+    {% set compare_cols_csv = columns_list | join(', ') %}
+{% endif %}
 
 with a as (