From eff7f3a73d8f3108de9051bb7baffba64cd82d03 Mon Sep 17 00:00:00 2001 From: Fernando Brito Date: Mon, 6 May 2024 12:15:50 +0200 Subject: [PATCH 1/3] Fix typo on GitHub Workflow --- .github/workflows/main_test_package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main_test_package.yml b/.github/workflows/main_test_package.yml index 8fbe69a..567eda7 100644 --- a/.github/workflows/main_test_package.yml +++ b/.github/workflows/main_test_package.yml @@ -33,7 +33,7 @@ jobs: run: tox -e integration_snowflake - name: Run Snowflake Tests - run: tox -e integration_snowflake + run: tox -e snowflake sqlfluff-lint-models: name: Lint dbt models using SQLFluff From 9a74d5af1b3cdaaa760a93f9ae587e0016043b13 Mon Sep 17 00:00:00 2001 From: Fernando Brito Date: Mon, 6 May 2024 12:23:40 +0200 Subject: [PATCH 2/3] Improve service spend coverage on the hourly_spend model --- models/hourly_spend.sql | 175 ++++++++++++++++++ .../stg_database_storage_usage_history.sql | 3 +- .../stg_database_storage_usage_history.yml | 2 + 3 files changed, 179 insertions(+), 1 deletion(-) diff --git a/models/hourly_spend.sql b/models/hourly_spend.sql index 222edec..27f490c 100644 --- a/models/hourly_spend.sql +++ b/models/hourly_spend.sql @@ -48,6 +48,14 @@ storage_terabytes_daily as ( from {{ ref('stg_database_storage_usage_history') }} group by 1, 2, 3 union all + select + date, + 'Hybrid Tables' as storage_type, + database_name, + sum(average_hybrid_table_storage_bytes) / power(1024, 4) as storage_terabytes + from {{ ref('stg_database_storage_usage_history') }} + group by 1, 2, 3 + union all select date, 'Stage' as storage_type, @@ -106,6 +114,73 @@ data_transfer_spend_hourly as ( and hours.hour::date = stg_usage_in_currency_daily.usage_date ), +ai_services_spend_hourly as ( + -- Snowflake's documentation states that AI Services costs should be in the METERING_HISTORY view, + -- https://docs.snowflake.com/en/sql-reference/account-usage/metering_history + -- but it doesn't appear to be the case yet. + -- So for now we just use the daily reported usage and evenly distribute it across the day + select + hours.hour, + 'AI Services' as service, + null as storage_type, + null as warehouse_name, + null as database_name, + coalesce(stg_usage_in_currency_daily.usage_in_currency / hours.hours_thus_far, 0) as spend, + spend as spend_net_cloud_services, + stg_usage_in_currency_daily.currency as currency + from hours + left join {{ ref('stg_usage_in_currency_daily') }} as stg_usage_in_currency_daily on + stg_usage_in_currency_daily.account_locator = {{ account_locator() }} + and stg_usage_in_currency_daily.usage_type = 'ai services' + and hours.hour::date = stg_usage_in_currency_daily.usage_date +), + +logging_spend_hourly as ( + -- More granular cost information is available in the EVENT_USAGE_HISTORY view. + -- https://docs.snowflake.com/en/developer-guide/logging-tracing/logging-tracing-billing + -- For now we just use the daily reported usage and evenly distribute it across the day + select + hours.hour, + 'Logging' as service, + null as storage_type, + null as warehouse_name, + null as database_name, + coalesce(stg_usage_in_currency_daily.usage_in_currency / hours.hours_thus_far, 0) as spend, + spend as spend_net_cloud_services, + stg_usage_in_currency_daily.currency as currency + from hours + left join {{ ref('stg_usage_in_currency_daily') }} as stg_usage_in_currency_daily on + stg_usage_in_currency_daily.account_locator = {{ account_locator() }} + and stg_usage_in_currency_daily.usage_type = 'logging' + and hours.hour::date = stg_usage_in_currency_daily.usage_date +), + +-- For now we just use the daily reported usage and evenly distribute it across the day +-- More detailed information can be found on READER_ACCOUNT_USAGE.* +{% set reader_usage_types = [ + 'reader compute', 'reader storage', 'reader cloud services', + 'reader data transfer', 'reader adj for incl cloud services' +] %} + +{%- for reader_usage_type in reader_usage_types %} +"{{ reader_usage_type }}_spend_hourly" as ( + select + hours.hour, + INITCAP('{{ reader_usage_type }}') as service, + null as storage_type, + null as warehouse_name, + null as database_name, + coalesce(stg_usage_in_currency_daily.usage_in_currency / hours.hours_thus_far, 0) as spend, + spend as spend_net_cloud_services, + stg_usage_in_currency_daily.currency as currency + from hours + left join {{ ref('stg_usage_in_currency_daily') }} as stg_usage_in_currency_daily on + stg_usage_in_currency_daily.account_locator = {{ account_locator() }} + and stg_usage_in_currency_daily.usage_type = '{{ reader_usage_type }}' + and hours.hour::date = stg_usage_in_currency_daily.usage_date +), +{% endfor %} + compute_spend_hourly as ( select hours.hour, @@ -444,11 +519,105 @@ search_optimization_spend_hourly as ( group by 1, 2, 3, 4 ), +snowpark_container_services_spend_hourly as ( + select + hours.hour, + 'Snowpark Container Services' as service, + null as storage_type, + null as warehouse_name, + null as database_name, + coalesce( + sum( + stg_metering_history.credits_used * daily_rates.effective_rate + ), + 0 + ) as spend, + spend as spend_net_cloud_services, + any_value(daily_rates.currency) as currency + from hours + left join {{ ref('stg_metering_history') }} as stg_metering_history on + hours.hour = convert_timezone( + 'UTC', stg_metering_history.start_time + ) + and stg_metering_history.service_type = 'SNOWPARK_CONTAINER_SERVICES' + left join {{ ref('daily_rates') }} as daily_rates + on hours.hour::date = daily_rates.date + and daily_rates.service_type = 'COMPUTE' + and daily_rates.usage_type = 'snowpark container services' + group by 1, 2, 3, 4 +), + +hybrid_table_requests_spend_hourly as ( + select + hours.hour, + 'Hybrid Table Requests' as service, + null as storage_type, + null as warehouse_name, + null as database_name, + coalesce( + sum( + stg_metering_history.credits_used * daily_rates.effective_rate + ), + 0 + ) as spend, + spend as spend_net_cloud_services, + any_value(daily_rates.currency) as currency + from hours + left join {{ ref('stg_metering_history') }} as stg_metering_history on + hours.hour = convert_timezone( + 'UTC', stg_metering_history.start_time + ) + and stg_metering_history.service_type = 'HYBRID_TABLE_REQUESTS' + left join {{ ref('daily_rates') }} as daily_rates + on hours.hour::date = daily_rates.date + and daily_rates.service_type = 'COMPUTE' + and daily_rates.usage_type = 'hybrid table requests' + group by 1, 2, 3, 4 +), + +copy_files_spend_hourly as ( + select + hours.hour, + 'Copy Files' as service, + null as storage_type, + null as warehouse_name, + null as database_name, + coalesce( + sum( + stg_metering_history.credits_used * daily_rates.effective_rate + ), + 0 + ) as spend, + spend as spend_net_cloud_services, + any_value(daily_rates.currency) as currency + from hours + left join {{ ref('stg_metering_history') }} as stg_metering_history on + hours.hour = convert_timezone( + 'UTC', stg_metering_history.start_time + ) + and stg_metering_history.service_type = 'COPY_FILES' + left join {{ ref('daily_rates') }} as daily_rates + on hours.hour::date = daily_rates.date + and daily_rates.service_type = 'COMPUTE' + and daily_rates.usage_type = 'copy files' + group by 1, 2, 3, 4 +), + + + unioned as ( select * from storage_spend_hourly union all select * from data_transfer_spend_hourly union all + select * from ai_services_spend_hourly + union all + select * from logging_spend_hourly + union all + {%- for reader_usage_type in reader_usage_types %} + select * from "{{ reader_usage_type }}_spend_hourly" + union all + {%- endfor %} select * from compute_spend_hourly union all select * from adj_for_incl_cloud_services_hourly @@ -470,6 +639,12 @@ unioned as ( select * from search_optimization_spend_hourly union all select * from serverless_task_spend_hourly + union all + select * from snowpark_container_services_spend_hourly + union all + select * from hybrid_table_requests_spend_hourly + union all + select * from copy_files_spend_hourly ) select diff --git a/models/staging/stg_database_storage_usage_history.sql b/models/staging/stg_database_storage_usage_history.sql index 48c3738..9e9359a 100644 --- a/models/staging/stg_database_storage_usage_history.sql +++ b/models/staging/stg_database_storage_usage_history.sql @@ -4,5 +4,6 @@ select usage_date as date, database_name, average_database_bytes, - average_failsafe_bytes + average_failsafe_bytes, + average_hybrid_table_storage_bytes from {{ source('snowflake_account_usage', 'database_storage_usage_history') }} diff --git a/models/staging/stg_database_storage_usage_history.yml b/models/staging/stg_database_storage_usage_history.yml index 68fce03..0b27eeb 100644 --- a/models/staging/stg_database_storage_usage_history.yml +++ b/models/staging/stg_database_storage_usage_history.yml @@ -12,3 +12,5 @@ models: description: Number of bytes of database storage used, including data in Time Travel. - name: average_failsafe_bytes description: Number of bytes of Fail-safe storage used. + - name: average_hybrid_table_storage_bytes + description: Number of bytes of hybrid storage used. From 20cad4e14b3f88d15c274dd66896617be4de9ab3 Mon Sep 17 00:00:00 2001 From: Fernando Brito Date: Mon, 6 May 2024 14:22:37 +0200 Subject: [PATCH 3/3] Make Hybrid Table Storage its own service type --- models/hourly_spend.sql | 109 +++++++++++++++--------- models/staging/stg_rate_sheet_daily.sql | 2 +- 2 files changed, 72 insertions(+), 39 deletions(-) diff --git a/models/hourly_spend.sql b/models/hourly_spend.sql index 27f490c..76e0ce8 100644 --- a/models/hourly_spend.sql +++ b/models/hourly_spend.sql @@ -48,14 +48,6 @@ storage_terabytes_daily as ( from {{ ref('stg_database_storage_usage_history') }} group by 1, 2, 3 union all - select - date, - 'Hybrid Tables' as storage_type, - database_name, - sum(average_hybrid_table_storage_bytes) / power(1024, 4) as storage_terabytes - from {{ ref('stg_database_storage_usage_history') }} - group by 1, 2, 3 - union all select date, 'Stage' as storage_type, @@ -92,6 +84,73 @@ storage_spend_hourly as ( group by 1, 2, 3, 4, 5 ), +-- Hybrid Table Storage has its own service type in `usage_in_currency_daily`, +-- so we also handle it separately, and not with "Storage". +_hybrid_table_terabytes_daily as ( + select + date, + null as storage_type, + database_name, + sum(average_hybrid_table_storage_bytes) / power(1024, 4) as storage_terabytes + from {{ ref('stg_database_storage_usage_history') }} + group by 1, 2, 3 +), + +hybrid_table_storage_spend_hourly as ( + select + hours.hour, + 'Hybrid Table Storage' as service, + null as storage_type, + null as warehouse_name, + _hybrid_table_terabytes_daily.database_name, + coalesce( + sum( + div0( + _hybrid_table_terabytes_daily.storage_terabytes, + hours.days_in_month * 24 + ) * daily_rates.effective_rate + ), + 0 + ) as spend, + spend as spend_net_cloud_services, + any_value(daily_rates.currency) as currency + from hours + left join _hybrid_table_terabytes_daily on hours.date = convert_timezone('UTC', _hybrid_table_terabytes_daily.date) + left join {{ ref('daily_rates') }} as daily_rates + on _hybrid_table_terabytes_daily.date = daily_rates.date + and daily_rates.service_type = 'STORAGE' + and daily_rates.usage_type = 'hybrid table storage' + group by 1, 2, 3, 4, 5 +), + +hybrid_table_requests_spend_hourly as ( + select + hours.hour, + 'Hybrid Table Requests' as service, + null as storage_type, + null as warehouse_name, + null as database_name, + coalesce( + sum( + stg_metering_history.credits_used * daily_rates.effective_rate + ), + 0 + ) as spend, + spend as spend_net_cloud_services, + any_value(daily_rates.currency) as currency + from hours + left join {{ ref('stg_metering_history') }} as stg_metering_history on + hours.hour = convert_timezone( + 'UTC', stg_metering_history.start_time + ) + and stg_metering_history.service_type = 'HYBRID_TABLE_REQUESTS' + left join {{ ref('daily_rates') }} as daily_rates + on hours.hour::date = daily_rates.date + and daily_rates.service_type = 'COMPUTE' + and daily_rates.usage_type = 'hybrid table requests' + group by 1, 2, 3, 4 +), + data_transfer_spend_hourly as ( -- Right now we don't have a way of getting this at an hourly grain -- We can get source cloud + region, target cloud + region, and bytes transferred at an hourly grain from DATA_TRANSFER_HISTORY @@ -547,34 +606,6 @@ snowpark_container_services_spend_hourly as ( group by 1, 2, 3, 4 ), -hybrid_table_requests_spend_hourly as ( - select - hours.hour, - 'Hybrid Table Requests' as service, - null as storage_type, - null as warehouse_name, - null as database_name, - coalesce( - sum( - stg_metering_history.credits_used * daily_rates.effective_rate - ), - 0 - ) as spend, - spend as spend_net_cloud_services, - any_value(daily_rates.currency) as currency - from hours - left join {{ ref('stg_metering_history') }} as stg_metering_history on - hours.hour = convert_timezone( - 'UTC', stg_metering_history.start_time - ) - and stg_metering_history.service_type = 'HYBRID_TABLE_REQUESTS' - left join {{ ref('daily_rates') }} as daily_rates - on hours.hour::date = daily_rates.date - and daily_rates.service_type = 'COMPUTE' - and daily_rates.usage_type = 'hybrid table requests' - group by 1, 2, 3, 4 -), - copy_files_spend_hourly as ( select hours.hour, @@ -608,6 +639,10 @@ copy_files_spend_hourly as ( unioned as ( select * from storage_spend_hourly union all + select * from hybrid_table_storage_spend_hourly + union all + select * from hybrid_table_requests_spend_hourly + union all select * from data_transfer_spend_hourly union all select * from ai_services_spend_hourly @@ -642,8 +677,6 @@ unioned as ( union all select * from snowpark_container_services_spend_hourly union all - select * from hybrid_table_requests_spend_hourly - union all select * from copy_files_spend_hourly ) diff --git a/models/staging/stg_rate_sheet_daily.sql b/models/staging/stg_rate_sheet_daily.sql index 460732d..44b9454 100644 --- a/models/staging/stg_rate_sheet_daily.sql +++ b/models/staging/stg_rate_sheet_daily.sql @@ -17,7 +17,7 @@ select -- Have recently seen new values introduced for one account: WAREHOUSE_METERING and CLOUD_SERVICES -- For now, we'll force these to either be COMPUTE or STORAGE since that's what the downstream models expect -- May adjust this in the future if Snowflake is permanently changing these fields for all accounts and starts offering different credit rates per usage_type - when service_type = 'STORAGE' then 'STORAGE' + when service_type in ('STORAGE', 'HYBRID_TABLE_STORAGE') then 'STORAGE' else 'COMPUTE' end as service_type from {{ source('snowflake_organization_usage', 'rate_sheet_daily') }}