From b3d2e4af76c3998769fc6ac420218d1fa8bafcbc Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Thu, 28 Mar 2024 16:22:41 -0400 Subject: [PATCH 01/12] add assets for change graph --- dbt_project/models/ANALYTICS/location_stats.sql | 7 +++++++ dbt_project/models/CLEANED/orders_cleaned.sql | 1 + hooli_data_eng/resources/api.py | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 dbt_project/models/ANALYTICS/location_stats.sql diff --git a/dbt_project/models/ANALYTICS/location_stats.sql b/dbt_project/models/ANALYTICS/location_stats.sql new file mode 100644 index 00000000..a546a9bf --- /dev/null +++ b/dbt_project/models/ANALYTICS/location_stats.sql @@ -0,0 +1,7 @@ +select + order_date, + state, + count(*) as n_orders, + sum(order_total) as total_revenue +from {{ ref("orders_augmented") }} +group by 1, 2 diff --git a/dbt_project/models/CLEANED/orders_cleaned.sql b/dbt_project/models/CLEANED/orders_cleaned.sql index 12128c57..370b9dd9 100644 --- a/dbt_project/models/CLEANED/orders_cleaned.sql +++ b/dbt_project/models/CLEANED/orders_cleaned.sql @@ -5,6 +5,7 @@ select sku, dt, cast(dt as datetime) as order_date, + date_trunc('month', order_date) as order_month quantity * purchase_price as order_total from {{ source("RAW_DATA", "orders") }} {% if is_incremental() %} diff --git a/hooli_data_eng/resources/api.py b/hooli_data_eng/resources/api.py index 881383c8..807260a8 100644 --- a/hooli_data_eng/resources/api.py +++ b/hooli_data_eng/resources/api.py @@ -33,7 +33,7 @@ def get_orders(self, datetime_to_process): # random order data returned, see utils.py json = random_data( - extra_columns={"order_id": str, "quantity": int, "purchase_price": float, "sku": str}, + extra_columns={"order_id": str, "order_desc": str, "quantity": int, "purchase_price": float, "sku": str}, n = 10, filter_date=datetime_to_process ).to_json() From 58c9dd99883f7925284150946abc5c1ca35ce70c Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Thu, 28 Mar 2024 16:43:52 -0400 Subject: [PATCH 02/12] add location_stats --- hooli_data_eng/assets/dbt_assets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hooli_data_eng/assets/dbt_assets.py b/hooli_data_eng/assets/dbt_assets.py index a86a25d2..c213a5d6 100644 --- a/hooli_data_eng/assets/dbt_assets.py +++ b/hooli_data_eng/assets/dbt_assets.py @@ -147,7 +147,7 @@ def _process_partitioned_dbt_assets(context: OpExecutionContext, dbt: DbtCliReso @dbt_assets( manifest=DBT_MANIFEST, - select="orders_cleaned users_cleaned orders_augmented", + select="orders_cleaned users_cleaned orders_augmented location_stats", partitions_def=daily_partitions, dagster_dbt_translator=CustomDagsterDbtTranslator(settings=DagsterDbtTranslatorSettings(enable_asset_checks=True)), backfill_policy=BackfillPolicy.single_run(), From d965a18b84b3d7418696bfb7c2b3c971664298a2 Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Thu, 28 Mar 2024 16:44:15 -0400 Subject: [PATCH 03/12] speed up dockerfiles with uv --- hooli-demo-assets/Dockerfile | 3 ++- hooli_basics/Dockerfile | 3 ++- hooli_batch_enrichment/Dockerfile | 3 ++- hooli_snowflake_insights/Dockerfile | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hooli-demo-assets/Dockerfile b/hooli-demo-assets/Dockerfile index 1c263839..574975c1 100644 --- a/hooli-demo-assets/Dockerfile +++ b/hooli-demo-assets/Dockerfile @@ -4,4 +4,5 @@ WORKDIR /opt/dagster/app ADD . . -RUN pip install -e . \ No newline at end of file +RUN python -m pip install -U uv +RUN uv pip install --system -e . diff --git a/hooli_basics/Dockerfile b/hooli_basics/Dockerfile index 33751b97..d00dd7d6 100644 --- a/hooli_basics/Dockerfile +++ b/hooli_basics/Dockerfile @@ -4,4 +4,5 @@ WORKDIR /opt/dagster/app ADD . . -RUN pip install -r requirements.txt +RUN python -m pip install -U uv +RUN uv pip install --system -r requirements.txt diff --git a/hooli_batch_enrichment/Dockerfile b/hooli_batch_enrichment/Dockerfile index a401d314..aee36038 100644 --- a/hooli_batch_enrichment/Dockerfile +++ b/hooli_batch_enrichment/Dockerfile @@ -4,4 +4,5 @@ WORKDIR /opt/dagster/app ADD . . -RUN pip install -e . +RUN python -m pip install -U uv +RUN uv pip install --system -e . \ No newline at end of file diff --git a/hooli_snowflake_insights/Dockerfile b/hooli_snowflake_insights/Dockerfile index 4ac0381f..5a5e5abe 100644 --- a/hooli_snowflake_insights/Dockerfile +++ b/hooli_snowflake_insights/Dockerfile @@ -9,4 +9,5 @@ RUN python -m pip install git+https://github.com/wbond/oscrypto.git@d5f3437ed242 ADD . . -RUN pip install -r requirements.txt +RUN python -m pip install -U uv +RUN uv pip install --system -r requirements.txt From 899d12f2083cee0e3d36414714c5071aa3f85d22 Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Fri, 29 Mar 2024 09:42:10 -0400 Subject: [PATCH 04/12] add code version --- hooli_data_eng/assets/raw_data/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hooli_data_eng/assets/raw_data/__init__.py b/hooli_data_eng/assets/raw_data/__init__.py index aa44d810..7a849913 100644 --- a/hooli_data_eng/assets/raw_data/__init__.py +++ b/hooli_data_eng/assets/raw_data/__init__.py @@ -82,7 +82,8 @@ def check_users(context, users: pd.DataFrame): backoff=Backoff.LINEAR, jitter=Jitter.FULL ), - backfill_policy=BackfillPolicy.single_run() + backfill_policy=BackfillPolicy.single_run(), + code_version="1" ) def orders(context, api: RawDataAPI) -> pd.DataFrame: """A table containing all orders that have been placed""" From eb5f947dccdb3135b5643cbeebeff1f10129969d Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Fri, 29 Mar 2024 09:55:31 -0400 Subject: [PATCH 05/12] write sql good --- dbt_project/models/CLEANED/orders_cleaned.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt_project/models/CLEANED/orders_cleaned.sql b/dbt_project/models/CLEANED/orders_cleaned.sql index 370b9dd9..e02e13b4 100644 --- a/dbt_project/models/CLEANED/orders_cleaned.sql +++ b/dbt_project/models/CLEANED/orders_cleaned.sql @@ -5,7 +5,7 @@ select sku, dt, cast(dt as datetime) as order_date, - date_trunc('month', order_date) as order_month + date_trunc('month', order_date) as order_month, quantity * purchase_price as order_total from {{ source("RAW_DATA", "orders") }} {% if is_incremental() %} From cff880c742c45893db151c560a0a86273fde628d Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Wed, 3 Jul 2024 15:20:18 -0400 Subject: [PATCH 06/12] fix comma --- hooli_data_eng/assets/raw_data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hooli_data_eng/assets/raw_data/__init__.py b/hooli_data_eng/assets/raw_data/__init__.py index 7753e571..b3dde002 100644 --- a/hooli_data_eng/assets/raw_data/__init__.py +++ b/hooli_data_eng/assets/raw_data/__init__.py @@ -92,7 +92,7 @@ def check_users(context, users: pd.DataFrame): jitter=Jitter.FULL ), backfill_policy=BackfillPolicy.single_run(), - code_version="1" + code_version="1", tags={**StorageKindTagSet(storage_kind=storage_kind)}, ) def orders(context, api: RawDataAPI) -> pd.DataFrame: From b3ee75f25593d6dfd427002ea17755b0399b1b9c Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Thu, 28 Mar 2024 16:22:41 -0400 Subject: [PATCH 07/12] add assets for change graph --- dbt_project/models/ANALYTICS/location_stats.sql | 7 +++++++ dbt_project/models/CLEANED/orders_cleaned.sql | 1 + hooli_data_eng/resources/api.py | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 dbt_project/models/ANALYTICS/location_stats.sql diff --git a/dbt_project/models/ANALYTICS/location_stats.sql b/dbt_project/models/ANALYTICS/location_stats.sql new file mode 100644 index 00000000..a546a9bf --- /dev/null +++ b/dbt_project/models/ANALYTICS/location_stats.sql @@ -0,0 +1,7 @@ +select + order_date, + state, + count(*) as n_orders, + sum(order_total) as total_revenue +from {{ ref("orders_augmented") }} +group by 1, 2 diff --git a/dbt_project/models/CLEANED/orders_cleaned.sql b/dbt_project/models/CLEANED/orders_cleaned.sql index c200e626..6fa04715 100644 --- a/dbt_project/models/CLEANED/orders_cleaned.sql +++ b/dbt_project/models/CLEANED/orders_cleaned.sql @@ -6,6 +6,7 @@ select sku, dt, cast(dt as datetime) as order_date, + date_trunc('month', order_date) as order_month quantity * purchase_price as order_total from {{ source("raw_data", "orders") }} {% if is_incremental() %} diff --git a/hooli_data_eng/resources/api.py b/hooli_data_eng/resources/api.py index 881383c8..807260a8 100644 --- a/hooli_data_eng/resources/api.py +++ b/hooli_data_eng/resources/api.py @@ -33,7 +33,7 @@ def get_orders(self, datetime_to_process): # random order data returned, see utils.py json = random_data( - extra_columns={"order_id": str, "quantity": int, "purchase_price": float, "sku": str}, + extra_columns={"order_id": str, "order_desc": str, "quantity": int, "purchase_price": float, "sku": str}, n = 10, filter_date=datetime_to_process ).to_json() From 2e87083b9947462c73e1ca67c6406fe89d060002 Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Thu, 28 Mar 2024 16:43:52 -0400 Subject: [PATCH 08/12] add location_stats --- hooli_data_eng/assets/dbt_assets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hooli_data_eng/assets/dbt_assets.py b/hooli_data_eng/assets/dbt_assets.py index e0143c13..55421aff 100644 --- a/hooli_data_eng/assets/dbt_assets.py +++ b/hooli_data_eng/assets/dbt_assets.py @@ -134,7 +134,7 @@ def _process_partitioned_dbt_assets(context: OpExecutionContext, dbt: DbtCliReso @dbt_assets( manifest=DBT_MANIFEST, project=dbt_project, - select="orders_cleaned users_cleaned orders_augmented", + select="orders_cleaned users_cleaned orders_augmented location_stats", partitions_def=daily_partitions, dagster_dbt_translator=CustomDagsterDbtTranslator( settings=DagsterDbtTranslatorSettings(enable_asset_checks=True, From 790ca224557cb24caa02576f5edce8717e6e210f Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Thu, 28 Mar 2024 16:44:15 -0400 Subject: [PATCH 09/12] speed up dockerfiles with uv --- hooli-demo-assets/Dockerfile | 3 ++- hooli_basics/Dockerfile | 3 ++- hooli_batch_enrichment/Dockerfile | 3 ++- hooli_snowflake_insights/Dockerfile | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hooli-demo-assets/Dockerfile b/hooli-demo-assets/Dockerfile index efad8df4..f92f2a9c 100644 --- a/hooli-demo-assets/Dockerfile +++ b/hooli-demo-assets/Dockerfile @@ -4,4 +4,5 @@ WORKDIR /opt/dagster/app ADD . . -RUN pip install -e . \ No newline at end of file +RUN python -m pip install -U uv +RUN uv pip install --system -e . diff --git a/hooli_basics/Dockerfile b/hooli_basics/Dockerfile index 3887ec8e..ef531b07 100644 --- a/hooli_basics/Dockerfile +++ b/hooli_basics/Dockerfile @@ -4,4 +4,5 @@ WORKDIR /opt/dagster/app ADD . . -RUN pip install -r requirements.txt +RUN python -m pip install -U uv +RUN uv pip install --system -r requirements.txt diff --git a/hooli_batch_enrichment/Dockerfile b/hooli_batch_enrichment/Dockerfile index 3af84fc5..4f50126f 100644 --- a/hooli_batch_enrichment/Dockerfile +++ b/hooli_batch_enrichment/Dockerfile @@ -4,4 +4,5 @@ WORKDIR /opt/dagster/app ADD . . -RUN pip install -e . +RUN python -m pip install -U uv +RUN uv pip install --system -e . \ No newline at end of file diff --git a/hooli_snowflake_insights/Dockerfile b/hooli_snowflake_insights/Dockerfile index 1c0b3d70..d671b3b0 100644 --- a/hooli_snowflake_insights/Dockerfile +++ b/hooli_snowflake_insights/Dockerfile @@ -9,4 +9,5 @@ RUN python -m pip install git+https://github.com/wbond/oscrypto.git@d5f3437ed242 ADD . . -RUN pip install -r requirements.txt +RUN python -m pip install -U uv +RUN uv pip install --system -r requirements.txt From 551f1689cd3fa894450ae9ce7301ad617fbed512 Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Fri, 29 Mar 2024 09:42:10 -0400 Subject: [PATCH 10/12] add code version --- hooli_data_eng/assets/raw_data/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hooli_data_eng/assets/raw_data/__init__.py b/hooli_data_eng/assets/raw_data/__init__.py index 9c4262a2..5a668151 100644 --- a/hooli_data_eng/assets/raw_data/__init__.py +++ b/hooli_data_eng/assets/raw_data/__init__.py @@ -93,6 +93,7 @@ def check_users(context, users: pd.DataFrame): ), backfill_policy=BackfillPolicy.single_run(), tags={**StorageKindTagSet(storage_kind=storage_kind)}, + code_version="1" ) def orders(context, api: RawDataAPI) -> pd.DataFrame: """A table containing all orders that have been placed""" From aafaa1ddc302313ad77511fd6f823ab88798b69d Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Fri, 29 Mar 2024 09:55:31 -0400 Subject: [PATCH 11/12] write sql good --- dbt_project/models/CLEANED/orders_cleaned.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt_project/models/CLEANED/orders_cleaned.sql b/dbt_project/models/CLEANED/orders_cleaned.sql index 6fa04715..39803556 100644 --- a/dbt_project/models/CLEANED/orders_cleaned.sql +++ b/dbt_project/models/CLEANED/orders_cleaned.sql @@ -6,7 +6,7 @@ select sku, dt, cast(dt as datetime) as order_date, - date_trunc('month', order_date) as order_month + date_trunc('month', order_date) as order_month, quantity * purchase_price as order_total from {{ source("raw_data", "orders") }} {% if is_incremental() %} From c6db99e818dc1f1355f3ae604c496cb46df88e89 Mon Sep 17 00:00:00 2001 From: Christian Minich Date: Wed, 3 Jul 2024 15:20:18 -0400 Subject: [PATCH 12/12] fix comma --- hooli_data_eng/assets/raw_data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hooli_data_eng/assets/raw_data/__init__.py b/hooli_data_eng/assets/raw_data/__init__.py index 5a668151..c6b33691 100644 --- a/hooli_data_eng/assets/raw_data/__init__.py +++ b/hooli_data_eng/assets/raw_data/__init__.py @@ -92,8 +92,8 @@ def check_users(context, users: pd.DataFrame): jitter=Jitter.FULL ), backfill_policy=BackfillPolicy.single_run(), + code_version="1", tags={**StorageKindTagSet(storage_kind=storage_kind)}, - code_version="1" ) def orders(context, api: RawDataAPI) -> pd.DataFrame: """A table containing all orders that have been placed"""