-
Notifications
You must be signed in to change notification settings - Fork 281
/
storage_billing_model_savings_ddl.sql
140 lines (131 loc) · 6.48 KB
/
storage_billing_model_savings_ddl.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
-- WARNING #########################################################################################################################################
-- WARNING If you change the storage billing model of a dataset then you have to wait 14 days before changing it again.
-- WARNING See https://cloud.google.com/bigquery/docs/datasets-intro#dataset_limitations
-- WARNING #########################################################################################################################################
-- Instructions
-- Search for marker 'REMEMBER' to tune the queries at your will
-- Run the query to obtain the results including ALTER SCHEMA DDL
-- Permissions needed
-- * To run this query https://cloud.google.com/bigquery/docs/information-schema-table-storage-by-organization#required_permissions
-- * To run the ALTER SCHEMA DDL https://cloud.google.com/bigquery/docs/updating-datasets#required_permissions
-- REMEMBER: Put here the prices of the region of interest, current values are for the US
-- See https://cloud.google.com/bigquery/pricing#storage
DECLARE active_logical_gib_price FLOAT64 DEFAULT 0.02;
DECLARE long_term_logical_gib_price FLOAT64 DEFAULT 0.01;
DECLARE active_physical_gib_price FLOAT64 DEFAULT 0.04;
DECLARE long_term_physical_gib_price FLOAT64 DEFAULT 0.02;
-- REMEMBER: (optional) Change this if you plan to change the time travel window (e.g. if you currently use the default 7 days and want to reduce it to 2 days put 2.0/7.0)
-- See https://cloud.google.com/bigquery/docs/time-travel#configuring_the_time_travel_window
DECLARE time_travel_rescale FLOAT64 DEFAULT 1.0;
-- REMEMBER: (optional) Change this if you want that the generated DDL statements include time travel window settings (e.g. 2.0*24.0)
DECLARE time_travel_hours FLOAT64 DEFAULT NULL;
-- REMEMBER: (optional) Change this to filter based on savings absolute value
DECLARE min_monthly_saving FLOAT64 DEFAULT 0.0;
-- REMEMBER: (optional) Change this to filter based on savings % value (e.g. 1% is 0.01)
DECLARE min_monthly_saving_pct FLOAT64 DEFAULT 0.00;
CREATE SCHEMA IF NOT EXISTS optimization_workshop;
CREATE OR REPLACE TABLE optimization_workshop.storage_billing_model_savings_ddl AS
WITH
storage_sizes AS (
SELECT
project_id AS project_name,
table_schema AS dataset_name,
-- Logical
SUM(active_logical_bytes) / power(1024, 3) AS active_logical_gib,
SUM(long_term_logical_bytes) / power(1024, 3) AS long_term_logical_gib,
-- Physical
SUM(active_physical_bytes - time_travel_physical_bytes) / power(1024, 3) AS active_no_tt_no_fs_physical_gib,
SUM(time_travel_physical_bytes) / power(1024, 3) * time_travel_rescale AS time_travel_physical_gib,
SUM(fail_safe_physical_bytes) / power(1024, 3) AS fail_safe_physical_gib,
SUM(long_term_physical_bytes) / power(1024, 3) AS long_term_physical_gib,
FROM
-- REMEMBER: Change here to the region of interest
`region-us`.INFORMATION_SCHEMA.TABLE_STORAGE_BY_ORGANIZATION
-- See https://cloud.google.com/bigquery/docs/information-schema-table-storage-by-organization#schema
WHERE TRUE
AND total_physical_bytes > 0
-- REMEMBER: (optional) You may want to enable this filter to exclude ML models, materialized views, etc...
-- AND table_type = 'BASE TABLE'
GROUP BY 1,2
),
storage_prices AS (
SELECT
project_name,
dataset_name,
-- Logical
active_logical_gib AS active_logical_gib,
long_term_logical_gib AS long_term_logical_gib,
-- Physical
active_no_tt_no_fs_physical_gib AS active_no_tt_no_fs_physical_gib,
time_travel_physical_gib AS time_travel_physical_gib,
fail_safe_physical_gib AS fail_safe_physical_gib,
long_term_physical_gib AS long_term_physical_gib,
-- Compression ratio
SAFE_DIVIDE(active_logical_gib, active_no_tt_no_fs_physical_gib) AS active_compression_ratio,
SAFE_DIVIDE(long_term_logical_gib, long_term_physical_gib) AS long_term_compression_ratio,
-- Forecast costs logical
active_logical_gib * active_logical_gib_price AS forecast_active_logical_cost,
long_term_logical_gib * long_term_logical_gib_price AS forecast_long_term_logical_cost,
-- Forecast costs physical
active_no_tt_no_fs_physical_gib * active_physical_gib_price AS forecast_active_no_tt_no_fs_physical_cost,
time_travel_physical_gib * active_physical_gib_price AS forecast_travel_physical_cost,
fail_safe_physical_gib * active_physical_gib_price AS forecast_failsafe_physical_cost,
long_term_physical_gib * long_term_physical_gib_price AS forecast_long_term_physical_cost,
FROM
storage_sizes
),
storage_prices_total AS (
SELECT
project_name,
dataset_name,
(forecast_active_logical_cost+forecast_long_term_logical_cost) AS forecast_logical,
(forecast_active_no_tt_no_fs_physical_cost+forecast_travel_physical_cost+forecast_failsafe_physical_cost+forecast_long_term_physical_cost) AS forecast_physical,
FROM storage_prices
),
current_model AS (
SELECT
catalog_name AS project_name,
option_name AS dataset_name,
LOWER(option_value) AS current_model
FROM
-- REMEMBER: Change here to the region of interest
`region-us`.INFORMATION_SCHEMA.SCHEMATA_OPTIONS
WHERE option_name = "storage_billing_model"
),
storage_prices_compare AS (
SELECT
SPT.project_name,
SPT.dataset_name,
SPT.forecast_logical,
SPT.forecast_physical,
(SPT.forecast_logical - SPT.forecast_physical) AS forecast_compare,
IF(SPT.forecast_logical > SPT.forecast_physical,"physical","logical") AS better_on,
IFNULL(CM.current_model, "logical") AS currently_on
FROM
storage_prices_total AS SPT
LEFT JOIN current_model AS CM
USING(project_name, dataset_name)
),
storage_prices_compare2 AS (
SELECT
*,
IF(currently_on = "logical", forecast_logical, forecast_physical) AS monthly_spending,
ABS(forecast_compare) AS monthly_savings,
ABS(forecast_compare)/IF(currently_on = "logical", forecast_logical, forecast_physical) AS monthly_savings_pct,
FROM storage_prices_compare
),
storage_ddl AS (
SELECT
*,
IF(time_travel_hours IS NULL,
CONCAT("ALTER SCHEMA `", project_name, ".", dataset_name, "` SET OPTIONS(storage_billing_model='", better_on, "' );" ),
CONCAT("ALTER SCHEMA `", project_name, ".", dataset_name, "` SET OPTIONS(storage_billing_model='", better_on, "', max_time_travel_hours=", time_travel_hours, ");" )
) AS ddl,
FROM storage_prices_compare2
WHERE better_on != currently_on
AND monthly_savings > min_monthly_saving
AND monthly_savings_pct > min_monthly_saving_pct
ORDER BY monthly_savings DESC
)
-- REMEMBER: (optional) Change here the name of the pseudo-table (CTE) you want to check
SELECT * FROM storage_ddl;