Skip to content

Commit

Permalink
Fix fanout in customers model (#20)
Browse files Browse the repository at this point in the history
Closes #1 

This turned into pulling a sweater thread that unravelled some issues in
the source data, so I had to dig in to quite a bit! 

This updates some issues in the source data seed files in addition to fixing logic.
  • Loading branch information
gwenwindflower authored Mar 29, 2024
1 parent 558fe27 commit 2447e1b
Show file tree
Hide file tree
Showing 19 changed files with 5,658 additions and 5,621 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ env
target/
dbt_packages/
logs/
profiles.yml

.DS_Store

.user.yml
*.hurl

.ruff_cache
__pycache__
14 changes: 4 additions & 10 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,17 @@ repos:
- id: trailing-whitespace
- id: requirements-txt-fixer
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.3.3
rev: v0.3.4
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format
- repo: https://github.com/sqlfluff/sqlfluff
rev: "3.0.1"
rev: "3.0.3"
hooks:
- id: sqlfluff-lint
additional_dependencies:
[
"dbt-metricflow[duckdb,snowflake,postgres]~=0.6.0",
"sqlfluff-templater-dbt~=3.0.1",
]
- id: sqlfluff-fix
additional_dependencies:
[
"dbt-metricflow[duckdb,snowflake,postgres]~=0.6.0",
"sqlfluff-templater-dbt~=3.0.1",
"dbt-metricflow[snowflake,bigquery,postgres]~=0.6.0",
"sqlfluff-templater-dbt~=3.0.3",
]
8 changes: 4 additions & 4 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ tasks:

build:
cmds:
- source .venv/bin/activate && dbt deps
- source .venv/bin/activate && dbt seed
- dbt deps
- dbt seed
- rm -rf jaffle-data
- source .venv/bin/activate && dbt run
- source .venv/bin/activate && dbt test
- dbt run
- dbt test

setup:
cmds:
Expand Down
6 changes: 5 additions & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@ config-version: 2

name: "jaffle_shop"
version: "3.0.0"
require-dbt-version: ">=1.7.1"
require-dbt-version: ">=1.5.0"

dbt-cloud:
project-id: 283328 # Put your project id here

# If you want to run SQLFluff pre-commit hooks you'll need
# to set up a working profile it can use and list it below
profile: default

model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["data-tests"]
Expand Down
11,124 changes: 5,562 additions & 5,562 deletions jaffle-data/raw_orders.csv

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions macros/cents_to_dollars.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
{# A basic example for a project-wide macro to cast a column uniformly #}

{% macro cents_to_dollars(column_name, precision=2) -%}
({{ column_name }} / 100)::numeric(16, {{ precision }})
{% macro cents_to_dollars(column_name) -%}
{{ return(adapter.dispatch('cents_to_dollars')(column_name)) }}
{%- endmacro %}

{% macro default__cents_to_dollars(column_name) -%}
({{ column_name }} / 100)::numeric(16, 2)
{%- endmacro %}

{% macro bigquery__cents_to_dollars(column_name) %}
round(cast(({{ column_name }} / 100) as numeric), 2)
{% endmacro %}
36 changes: 14 additions & 22 deletions models/marts/customers.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,13 @@ customers as (

),

orders_table as (
orders as (

select * from {{ ref('orders') }}

),

order_items_table as (

select * from {{ ref('order_items') }}

),

order_summary as (
customer_orders_summary as (

select
orders.customer_id,
Expand All @@ -27,14 +21,11 @@ order_summary as (
count(distinct orders.order_id) > 1 as is_repeat_buyer,
min(orders.ordered_at) as first_ordered_at,
max(orders.ordered_at) as last_ordered_at,
sum(order_items.product_price) as lifetime_spend_pretax,
sum(orders.subtotal) as lifetime_spend_pretax,
sum(orders.tax_paid) as lifetime_tax_paid,
sum(orders.order_total) as lifetime_spend

from orders_table as orders

left join
order_items_table as order_items
on orders.order_id = order_items.order_id
from orders

group by 1

Expand All @@ -44,21 +35,22 @@ joined as (

select
customers.*,
order_summary.count_lifetime_orders,
order_summary.first_ordered_at,
order_summary.last_ordered_at,
order_summary.lifetime_spend_pretax,
order_summary.lifetime_spend,
customer_orders_summary.count_lifetime_orders,
customer_orders_summary.first_ordered_at,
customer_orders_summary.last_ordered_at,
customer_orders_summary.lifetime_spend_pretax,
customer_orders_summary.lifetime_tax_paid,
customer_orders_summary.lifetime_spend,

case
when order_summary.is_repeat_buyer then 'returning'
when customer_orders_summary.is_repeat_buyer then 'returning'
else 'new'
end as customer_type

from customers

left join order_summary
on customers.customer_id = order_summary.customer_id
left join customer_orders_summary
on customers.customer_id = customer_orders_summary.customer_id

)

Expand Down
5 changes: 5 additions & 0 deletions models/marts/customers.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
models:
- name: customers
description: Customer overview data mart, offering key details for each unique customer. One row per customer.
tests:
- dbt_utils.expression_is_true:
expression: "lifetime_spend_pretax + lifetime_tax_paid = lifetime_spend"
columns:
- name: customer_id
description: The unique key of the orders mart.
Expand All @@ -17,6 +20,8 @@ models:
description: The timestamp of a customer's most recent order.
- name: lifetime_spend_pretax
description: The sum of all the pre-tax subtotals of every order a customer has placed.
- name: lifetime_tax_paid
description: The sum of all the tax portion of every order a customer has placed.
- name: lifetime_spend
description: The sum of all the order totals (including tax) that a customer has ever placed.
- name: customer_type
Expand Down
1 change: 1 addition & 0 deletions models/marts/order_items.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ order_supplies_summary as (

select
product_id,

sum(supply_cost) as supply_cost

from supplies
Expand Down
5 changes: 5 additions & 0 deletions models/marts/order_items.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ models:
tests:
- not_null
- unique
- name: order_id
tests:
- relationships:
to: ref('orders')
field: order_id

unit_tests:
- name: test_supply_costs_sum_correctly
Expand Down
16 changes: 9 additions & 7 deletions models/marts/orders.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ orders as (

),

order_items_table as (
order_items as (

select * from {{ ref('order_items') }}

Expand All @@ -16,14 +16,15 @@ order_items_summary as (

select
order_id,

sum(supply_cost) as order_cost,
sum(product_price) as order_items_subtotal,
count(order_item_id) as count_order_items,
sum(case when is_food_item then 1 else 0 end)
as count_food_items,
sum(case when is_drink_item then 1 else 0 end)
as count_drink_items,

sum(supply_cost) as order_cost

from order_items_table
as count_drink_items
from order_items

group by 1

Expand All @@ -33,11 +34,12 @@ order_items_summary as (
compute_booleans as (

select

orders.*,
order_items_summary.order_cost,
order_items_summary.order_items_subtotal,
order_items_summary.count_food_items,
order_items_summary.count_drink_items,
order_items_summary.count_order_items,
order_items_summary.count_food_items > 0 as is_food_order,
order_items_summary.count_drink_items > 0 as is_drink_order

Expand Down
5 changes: 5 additions & 0 deletions models/marts/orders.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
models:
- name: orders
description: Order overview data mart, offering key details for each order inlcluding if it's a customer's first order and a food vs. drink item breakdown. One row per order.
tests:
- dbt_utils.expression_is_true:
expression: "order_items_subtotal = subtotal"
- dbt_utils.expression_is_true:
expression: "order_total = subtotal + tax_paid"
columns:
- name: order_id
description: The unique key of the orders mart.
Expand Down
7 changes: 7 additions & 0 deletions models/staging/stg_order_items.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,10 @@ models:
tests:
- not_null
- unique
- name: order_id
description: The corresponding order each order item belongs to
tests:
- not_null
- relationships:
to: ref('stg_orders')
field: order_id
8 changes: 6 additions & 2 deletions models/staging/stg_orders.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ renamed as (
customer as customer_id,

---------- numerics
(order_total / 100.0) as order_total,
(tax_paid / 100.0) as tax_paid,
subtotal as subtotal_cents,
tax_paid as tax_paid_cents,
order_total as order_total_cents,
{{ cents_to_dollars('subtotal') }} as subtotal,
{{ cents_to_dollars('tax_paid') }} as tax_paid,
{{ cents_to_dollars('order_total') }} as order_total,

---------- timestamps
{{ dbt.date_trunc('day','ordered_at') }} as ordered_at
Expand Down
3 changes: 3 additions & 0 deletions models/staging/stg_orders.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
models:
- name: stg_orders
description: Order data with basic cleaning and transformation applied, one row per order.
tests:
- dbt_utils.expression_is_true:
expression: "order_total - tax_paid = subtotal"
columns:
- name: order_id
description: The unique key for each order.
Expand Down
2 changes: 1 addition & 1 deletion models/staging/stg_products.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ renamed as (


---------- numerics
(price / 100.0) as product_price,
{{ cents_to_dollars('price') }} as product_price,

---------- booleans
coalesce(type = 'jaffle', false) as is_food_item,
Expand Down
2 changes: 1 addition & 1 deletion models/staging/stg_supplies.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ renamed as (
name as supply_name,

---------- numerics
(cost / 100.0) as supply_cost,
{{ cents_to_dollars('cost') }} as supply_cost,

---------- booleans
perishable as is_perishable_supply
Expand Down
16 changes: 9 additions & 7 deletions package-lock.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
packages:
- package: dbt-labs/dbt_utils
version: 1.1.1
- package: calogica/dbt_date
version: 0.10.0
- package: dbt-labs/audit_helper
version: 0.10.0
sha1_hash: ef1c92462efce413655c7e7cbfc1b093c132ef83
- package: dbt-labs/dbt_utils
version: 1.1.1
- package: calogica/dbt_date
version: 0.10.0
- package: dbt-labs/audit_helper
version: 0.11.0
- package: calogica/dbt_expectations
version: 0.10.3
sha1_hash: 04cccdda410558cb2751bfe1ee6c1f6f61b60776
4 changes: 3 additions & 1 deletion packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ packages:
- package: calogica/dbt_date
version: 0.10.0
- package: dbt-labs/audit_helper
version: 0.10.0
version: 0.11.0
- package: calogica/dbt_expectations
version: 0.10.3

0 comments on commit 2447e1b

Please sign in to comment.