Skip to content

Commit

Permalink
Merge pull request #152 from get-select/add-new-services
Browse files Browse the repository at this point in the history
Improve service spend coverage by adding support to new Snowflake services
  • Loading branch information
NiallRees authored May 6, 2024
2 parents 8fef83a + 20cad4e commit 65e2a71
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main_test_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
run: tox -e integration_snowflake

- name: Run Snowflake Tests
run: tox -e integration_snowflake
run: tox -e snowflake

sqlfluff-lint-models:
name: Lint dbt models using SQLFluff
Expand Down
208 changes: 208 additions & 0 deletions models/hourly_spend.sql
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,73 @@ storage_spend_hourly as (
group by 1, 2, 3, 4, 5
),

-- Hybrid Table Storage has its own service type in `usage_in_currency_daily`,
-- so we also handle it separately, and not with "Storage".
_hybrid_table_terabytes_daily as (
select
date,
null as storage_type,
database_name,
sum(average_hybrid_table_storage_bytes) / power(1024, 4) as storage_terabytes
from {{ ref('stg_database_storage_usage_history') }}
group by 1, 2, 3
),

hybrid_table_storage_spend_hourly as (
select
hours.hour,
'Hybrid Table Storage' as service,
null as storage_type,
null as warehouse_name,
_hybrid_table_terabytes_daily.database_name,
coalesce(
sum(
div0(
_hybrid_table_terabytes_daily.storage_terabytes,
hours.days_in_month * 24
) * daily_rates.effective_rate
),
0
) as spend,
spend as spend_net_cloud_services,
any_value(daily_rates.currency) as currency
from hours
left join _hybrid_table_terabytes_daily on hours.date = convert_timezone('UTC', _hybrid_table_terabytes_daily.date)
left join {{ ref('daily_rates') }} as daily_rates
on _hybrid_table_terabytes_daily.date = daily_rates.date
and daily_rates.service_type = 'STORAGE'
and daily_rates.usage_type = 'hybrid table storage'
group by 1, 2, 3, 4, 5
),

hybrid_table_requests_spend_hourly as (
select
hours.hour,
'Hybrid Table Requests' as service,
null as storage_type,
null as warehouse_name,
null as database_name,
coalesce(
sum(
stg_metering_history.credits_used * daily_rates.effective_rate
),
0
) as spend,
spend as spend_net_cloud_services,
any_value(daily_rates.currency) as currency
from hours
left join {{ ref('stg_metering_history') }} as stg_metering_history on
hours.hour = convert_timezone(
'UTC', stg_metering_history.start_time
)
and stg_metering_history.service_type = 'HYBRID_TABLE_REQUESTS'
left join {{ ref('daily_rates') }} as daily_rates
on hours.hour::date = daily_rates.date
and daily_rates.service_type = 'COMPUTE'
and daily_rates.usage_type = 'hybrid table requests'
group by 1, 2, 3, 4
),

data_transfer_spend_hourly as (
-- Right now we don't have a way of getting this at an hourly grain
-- We can get source cloud + region, target cloud + region, and bytes transferred at an hourly grain from DATA_TRANSFER_HISTORY
Expand All @@ -106,6 +173,73 @@ data_transfer_spend_hourly as (
and hours.hour::date = stg_usage_in_currency_daily.usage_date
),

ai_services_spend_hourly as (
-- Snowflake's documentation states that AI Services costs should be in the METERING_HISTORY view,
-- https://docs.snowflake.com/en/sql-reference/account-usage/metering_history
-- but it doesn't appear to be the case yet.
-- So for now we just use the daily reported usage and evenly distribute it across the day
select
hours.hour,
'AI Services' as service,
null as storage_type,
null as warehouse_name,
null as database_name,
coalesce(stg_usage_in_currency_daily.usage_in_currency / hours.hours_thus_far, 0) as spend,
spend as spend_net_cloud_services,
stg_usage_in_currency_daily.currency as currency
from hours
left join {{ ref('stg_usage_in_currency_daily') }} as stg_usage_in_currency_daily on
stg_usage_in_currency_daily.account_locator = {{ account_locator() }}
and stg_usage_in_currency_daily.usage_type = 'ai services'
and hours.hour::date = stg_usage_in_currency_daily.usage_date
),

logging_spend_hourly as (
-- More granular cost information is available in the EVENT_USAGE_HISTORY view.
-- https://docs.snowflake.com/en/developer-guide/logging-tracing/logging-tracing-billing
-- For now we just use the daily reported usage and evenly distribute it across the day
select
hours.hour,
'Logging' as service,
null as storage_type,
null as warehouse_name,
null as database_name,
coalesce(stg_usage_in_currency_daily.usage_in_currency / hours.hours_thus_far, 0) as spend,
spend as spend_net_cloud_services,
stg_usage_in_currency_daily.currency as currency
from hours
left join {{ ref('stg_usage_in_currency_daily') }} as stg_usage_in_currency_daily on
stg_usage_in_currency_daily.account_locator = {{ account_locator() }}
and stg_usage_in_currency_daily.usage_type = 'logging'
and hours.hour::date = stg_usage_in_currency_daily.usage_date
),

-- For now we just use the daily reported usage and evenly distribute it across the day
-- More detailed information can be found on READER_ACCOUNT_USAGE.*
{% set reader_usage_types = [
'reader compute', 'reader storage', 'reader cloud services',
'reader data transfer', 'reader adj for incl cloud services'
] %}

{%- for reader_usage_type in reader_usage_types %}
"{{ reader_usage_type }}_spend_hourly" as (
select
hours.hour,
INITCAP('{{ reader_usage_type }}') as service,
null as storage_type,
null as warehouse_name,
null as database_name,
coalesce(stg_usage_in_currency_daily.usage_in_currency / hours.hours_thus_far, 0) as spend,
spend as spend_net_cloud_services,
stg_usage_in_currency_daily.currency as currency
from hours
left join {{ ref('stg_usage_in_currency_daily') }} as stg_usage_in_currency_daily on
stg_usage_in_currency_daily.account_locator = {{ account_locator() }}
and stg_usage_in_currency_daily.usage_type = '{{ reader_usage_type }}'
and hours.hour::date = stg_usage_in_currency_daily.usage_date
),
{% endfor %}

compute_spend_hourly as (
select
hours.hour,
Expand Down Expand Up @@ -444,11 +578,81 @@ search_optimization_spend_hourly as (
group by 1, 2, 3, 4
),

snowpark_container_services_spend_hourly as (
select
hours.hour,
'Snowpark Container Services' as service,
null as storage_type,
null as warehouse_name,
null as database_name,
coalesce(
sum(
stg_metering_history.credits_used * daily_rates.effective_rate
),
0
) as spend,
spend as spend_net_cloud_services,
any_value(daily_rates.currency) as currency
from hours
left join {{ ref('stg_metering_history') }} as stg_metering_history on
hours.hour = convert_timezone(
'UTC', stg_metering_history.start_time
)
and stg_metering_history.service_type = 'SNOWPARK_CONTAINER_SERVICES'
left join {{ ref('daily_rates') }} as daily_rates
on hours.hour::date = daily_rates.date
and daily_rates.service_type = 'COMPUTE'
and daily_rates.usage_type = 'snowpark container services'
group by 1, 2, 3, 4
),

copy_files_spend_hourly as (
select
hours.hour,
'Copy Files' as service,
null as storage_type,
null as warehouse_name,
null as database_name,
coalesce(
sum(
stg_metering_history.credits_used * daily_rates.effective_rate
),
0
) as spend,
spend as spend_net_cloud_services,
any_value(daily_rates.currency) as currency
from hours
left join {{ ref('stg_metering_history') }} as stg_metering_history on
hours.hour = convert_timezone(
'UTC', stg_metering_history.start_time
)
and stg_metering_history.service_type = 'COPY_FILES'
left join {{ ref('daily_rates') }} as daily_rates
on hours.hour::date = daily_rates.date
and daily_rates.service_type = 'COMPUTE'
and daily_rates.usage_type = 'copy files'
group by 1, 2, 3, 4
),



unioned as (
select * from storage_spend_hourly
union all
select * from hybrid_table_storage_spend_hourly
union all
select * from hybrid_table_requests_spend_hourly
union all
select * from data_transfer_spend_hourly
union all
select * from ai_services_spend_hourly
union all
select * from logging_spend_hourly
union all
{%- for reader_usage_type in reader_usage_types %}
select * from "{{ reader_usage_type }}_spend_hourly"
union all
{%- endfor %}
select * from compute_spend_hourly
union all
select * from adj_for_incl_cloud_services_hourly
Expand All @@ -470,6 +674,10 @@ unioned as (
select * from search_optimization_spend_hourly
union all
select * from serverless_task_spend_hourly
union all
select * from snowpark_container_services_spend_hourly
union all
select * from copy_files_spend_hourly
)

select
Expand Down
3 changes: 2 additions & 1 deletion models/staging/stg_database_storage_usage_history.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ select
usage_date as date,
database_name,
average_database_bytes,
average_failsafe_bytes
average_failsafe_bytes,
average_hybrid_table_storage_bytes
from {{ source('snowflake_account_usage', 'database_storage_usage_history') }}
2 changes: 2 additions & 0 deletions models/staging/stg_database_storage_usage_history.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ models:
description: Number of bytes of database storage used, including data in Time Travel.
- name: average_failsafe_bytes
description: Number of bytes of Fail-safe storage used.
- name: average_hybrid_table_storage_bytes
description: Number of bytes of hybrid storage used.
2 changes: 1 addition & 1 deletion models/staging/stg_rate_sheet_daily.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ select
-- Have recently seen new values introduced for one account: WAREHOUSE_METERING and CLOUD_SERVICES
-- For now, we'll force these to either be COMPUTE or STORAGE since that's what the downstream models expect
-- May adjust this in the future if Snowflake is permanently changing these fields for all accounts and starts offering different credit rates per usage_type
when service_type = 'STORAGE' then 'STORAGE'
when service_type in ('STORAGE', 'HYBRID_TABLE_STORAGE') then 'STORAGE'
else 'COMPUTE'
end as service_type
from {{ source('snowflake_organization_usage', 'rate_sheet_daily') }}
Expand Down

0 comments on commit 65e2a71

Please sign in to comment.