Skip to content

Commit

Permalink
Fix the histogram diff for athena
Browse files Browse the repository at this point in the history
Signed-off-by: popcorny <[email protected]>
  • Loading branch information
popcornylu committed Aug 5, 2024
1 parent a636f0b commit d568da9
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
4 changes: 2 additions & 2 deletions recce/tasks/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def generate_histogram_sql_integer(node, column, min_value, max_value, num_bins=
WITH value_ranges AS (
SELECT
{min_value} as min_value,
{max_value} as max_value,
{max_value} as max_value
),
bin_parameters AS (
SELECT
Expand Down Expand Up @@ -85,7 +85,7 @@ def generate_histogram_sql_numeric(node, column, min_value, max_value, num_bins=
WITH value_ranges AS (
SELECT
{min_value} as min_value,
{max_value} as max_value,
{max_value} as max_value
),
bin_parameters AS (
SELECT
Expand Down
37 changes: 37 additions & 0 deletions tests/tasks/test_histogram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from recce.tasks.histogram import HistogramDiffTask


def test_histogram(dbt_test_helper):
csv_data = """
customer_id,name,age
1,Alice,30
2,Bob,25
3,Charlie,35
4,Dolly,50
"""

dbt_test_helper.create_model("customers", csv_data, csv_data)

params = {
"model": "customers",
"column_name": "age",
"column_type": "int"
}

task = HistogramDiffTask(params)
run_result = task.execute()

# {
# 'base': {'counts': [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 'total': 4},
# 'current': {'counts': [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 'total': 4},
# 'min': 25, 'max': 50,
# 'bin_edges': [25, 26, ..., 51],
# 'labels': ['25-26', ..., '51-52']
# }
assert run_result['current']['counts'][0] == 1
assert run_result['current']['counts'][-1] == 1
assert run_result['current']['total'] == 4
assert run_result['min'] == 25
assert run_result['max'] == 50
assert run_result['bin_edges'][0] == 25
assert run_result['bin_edges'][-1] == 51

0 comments on commit d568da9

Please sign in to comment.