From cae3d7bea25c8b8841a73093eb13b767d346551d Mon Sep 17 00:00:00 2001 From: Carlos Timoteo Date: Wed, 11 Oct 2023 21:55:53 +0000 Subject: [PATCH] optimizing scalability of training and inference preparation sp --- ...ce_segmentation_inference_preparation.sqlx | 155 ++++++++++++------ ...nce_segmentation_training_preparation.sqlx | 125 ++++++++++++-- ..._lifetime_value_inference_preparation.sqlx | 12 +- ...r_lifetime_value_training_preparation.sqlx | 2 +- ...hase_propensity_inference_preparation.sqlx | 7 +- ...chase_propensity_training_preparation.sqlx | 3 +- 6 files changed, 226 insertions(+), 78 deletions(-) diff --git a/sql/procedure/audience_segmentation_inference_preparation.sqlx b/sql/procedure/audience_segmentation_inference_preparation.sqlx index ed74268e..8b5ae474 100644 --- a/sql/procedure/audience_segmentation_inference_preparation.sqlx +++ b/sql/procedure/audience_segmentation_inference_preparation.sqlx @@ -17,12 +17,13 @@ DECLARE lastest_processed_time_ud TIMESTAMP; DECLARE lastest_processed_time_uwm TIMESTAMP; DECLARE lastest_processed_time_um TIMESTAMP; +-- Setting procedure to lookback from the day before `inference_date` +SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY); + SET lastest_processed_time_ud = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_segmentation_dimensions` WHERE feature_date = inference_date LIMIT 1); SET lastest_processed_time_uwm = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_lookback_metrics` WHERE feature_date = inference_date LIMIT 1); SET lastest_processed_time_um = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_segmentation_metrics` WHERE feature_date = inference_date LIMIT 1); -SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY); - CREATE OR REPLACE TEMP TABLE inference_preparation_ud as ( SELECT DISTINCT UD.user_pseudo_id, @@ -62,42 +63,10 @@ CREATE OR REPLACE TEMP TABLE inference_preparation_ud as ( user_segmentation_dimensions_window AS (PARTITION BY UD.user_pseudo_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ); - - - - - - - -CREATE TEMP TABLE inference_preparation AS ( +CREATE TEMP TABLE inference_preparation_uwm AS ( SELECT DISTINCT - UD.user_pseudo_id, - MAX(UD.user_id) OVER(user_segmentation_dimensions_window) AS user_id, - UD.feature_date, - MAX(UD.month_of_the_year) OVER(user_segmentation_dimensions_window) AS month_of_the_year, - MAX(UD.week_of_the_year) OVER(user_segmentation_dimensions_window) AS week_of_the_year, - MAX(UD.day_of_the_month) OVER(user_segmentation_dimensions_window) AS day_of_the_month, - MAX(UD.day_of_week) OVER(user_segmentation_dimensions_window) AS day_of_week, - MAX(UD.device_category) OVER(user_segmentation_dimensions_window) AS device_category, - MAX(UD.device_mobile_brand_name) OVER(user_segmentation_dimensions_window) AS device_mobile_brand_name, - MAX(UD.device_mobile_model_name) OVER(user_segmentation_dimensions_window) AS device_mobile_model_name, - MAX(UD.device_os) OVER(user_segmentation_dimensions_window) AS device_os, - MAX(UD.device_os_version) OVER(user_segmentation_dimensions_window) AS device_os_version, - MAX(UD.device_language) OVER(user_segmentation_dimensions_window) AS device_language, - MAX(UD.device_web_browser) OVER(user_segmentation_dimensions_window) AS device_web_browser, - MAX(UD.device_web_browser_version) OVER(user_segmentation_dimensions_window) AS device_web_browser_version, - MAX(UD.geo_sub_continent) OVER(user_segmentation_dimensions_window) AS geo_sub_continent, - MAX(UD.geo_country) OVER(user_segmentation_dimensions_window) AS geo_country, - MAX(UD.geo_region) OVER(user_segmentation_dimensions_window) AS geo_region, - MAX(UD.geo_city) OVER(user_segmentation_dimensions_window) AS geo_city, - MAX(UD.geo_metro) OVER(user_segmentation_dimensions_window) AS geo_metro, - MAX(UD.last_traffic_source_medium) OVER(user_segmentation_dimensions_window) AS last_traffic_source_medium, - MAX(UD.last_traffic_source_name) OVER(user_segmentation_dimensions_window) AS last_traffic_source_name, - MAX(UD.last_traffic_source_source) OVER(user_segmentation_dimensions_window) AS last_traffic_source_source, - MAX(UD.first_traffic_source_medium) OVER(user_segmentation_dimensions_window) AS first_traffic_source_medium, - MAX(UD.first_traffic_source_name) OVER(user_segmentation_dimensions_window) AS first_traffic_source_name, - MAX(UD.first_traffic_source_source) OVER(user_segmentation_dimensions_window) AS first_traffic_source_source, - MAX(UD.has_signed_in_with_user_id) OVER(user_segmentation_dimensions_window) AS has_signed_in_with_user_id, + UWM.user_pseudo_id, + UWM.feature_date, MAX(UWM.active_users_past_1_7_day) OVER(user_lookback_metrics_window) AS active_users_past_1_7_day, MAX(UWM.active_users_past_8_14_day) OVER(user_lookback_metrics_window) AS active_users_past_8_14_day, MAX(UWM.purchases_past_1_7_day) OVER(user_lookback_metrics_window) AS purchases_past_1_7_day, @@ -111,7 +80,22 @@ CREATE TEMP TABLE inference_preparation AS ( MAX(UWM.checkouts_past_1_7_day) OVER(user_lookback_metrics_window) AS checkouts_past_1_7_day, MAX(UWM.checkouts_past_8_14_day) OVER(user_lookback_metrics_window) AS checkouts_past_8_14_day, MAX(UWM.ltv_revenue_past_1_7_day) OVER(user_lookback_metrics_window) AS ltv_revenue_past_1_7_day, - MAX(UWM.ltv_revenue_past_7_15_day) OVER(user_lookback_metrics_window) AS ltv_revenue_past_7_15_day, + MAX(UWM.ltv_revenue_past_7_15_day) OVER(user_lookback_metrics_window) AS ltv_revenue_past_7_15_day + FROM + `{{feature_store_project_id}}.{{feature_store_dataset}}.user_lookback_metrics` UWM + WHERE + -- Define the training+validation subset interval + UWM.feature_date = inference_date + AND UWM.processed_timestamp = lastest_processed_time_uwm + WINDOW + user_lookback_metrics_window AS (PARTITION BY UWM.user_pseudo_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +); + + + +CREATE TEMP TABLE inference_preparation_um AS ( + SELECT DISTINCT + UM.feature_date, MAX(UM.purchasers_users) OVER(user_scoped_segmentation_metrics_window) AS purchasers_users, MAX(UM.average_daily_purchasers) OVER(user_scoped_segmentation_metrics_window) AS average_daily_purchasers, MAX(UM.active_users) OVER(user_scoped_segmentation_metrics_window) AS active_users, @@ -138,26 +122,95 @@ CREATE TEMP TABLE inference_preparation AS ( MAX(UM.avg_user_conversion_rate) OVER(user_scoped_segmentation_metrics_window) AS avg_user_conversion_rate, MAX(UM.avg_session_conversion_rate) OVER(user_scoped_segmentation_metrics_window) AS avg_session_conversion_rate FROM - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_segmentation_dimensions` UD + `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_segmentation_metrics` UM + WHERE + -- Define the training+validation subset interval + UM.feature_date = inference_date + AND UM.processed_timestamp = lastest_processed_time_um + WINDOW + user_scoped_segmentation_metrics_window AS (PARTITION BY UM.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +); + + +CREATE TEMP TABLE inference_preparation AS ( + SELECT DISTINCT + UD.user_pseudo_id, + UD.user_id, + UD.feature_date, + UD.month_of_the_year, + UD.week_of_the_year, + UD.day_of_the_month, + UD.day_of_week, + UD.device_category, + UD.device_mobile_brand_name, + UD.device_mobile_model_name, + UD.device_os, + UD.device_os_version, + UD.device_language, + UD.device_web_browser, + UD.device_web_browser_version, + UD.geo_sub_continent, + UD.geo_country, + UD.geo_region, + UD.geo_city, + UD.geo_metro, + UD.last_traffic_source_medium, + UD.last_traffic_source_name, + UD.last_traffic_source_source, + UD.first_traffic_source_medium, + UD.first_traffic_source_name, + UD.first_traffic_source_source, + UD.has_signed_in_with_user_id, + UWM.active_users_past_1_7_day, + UWM.active_users_past_8_14_day, + UWM.purchases_past_1_7_day, + UWM.purchases_past_8_14_day, + UWM.visits_past_1_7_day, + UWM.visits_past_8_14_day, + UWM.view_items_past_1_7_day, + UWM.view_items_past_8_14_day, + UWM.add_to_carts_past_1_7_day, + UWM.add_to_carts_past_8_14_day, + UWM.checkouts_past_1_7_day, + UWM.checkouts_past_8_14_day, + UWM.ltv_revenue_past_1_7_day, + UWM.ltv_revenue_past_7_15_day, + UM.purchasers_users, + UM.average_daily_purchasers, + UM.active_users, + UM.DAU, + UM.MAU, + UM.WAU, + UM.dau_per_mau, + UM.dau_per_wau, + UM.wau_per_mau, + UM.users_engagement_duration_seconds, + UM.average_engagement_time, + UM.average_engagement_time_per_session, + UM.average_sessions_per_user, + UM.ARPPU, + UM.ARPU, + UM.average_daily_revenue, + UM.max_daily_revenue, + UM.min_daily_revenue, + UM.new_users, + UM.returning_users, + UM.first_time_purchasers, + UM.first_time_purchaser_conversion, + UM.first_time_purchasers_per_new_user, + UM.avg_user_conversion_rate, + UM.avg_session_conversion_rate + FROM + inference_preparation_ud UD INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_lookback_metrics` UWM + inference_preparation_uwm UWM ON UWM.user_pseudo_id = UD.user_pseudo_id AND UWM.feature_date = UD.feature_date INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_segmentation_metrics` UM + inference_preparation_um UM ON UM.feature_date = UD.feature_date - WHERE - -- Define the training+validation subset interval - UD.feature_date = inference_date - AND UD.processed_timestamp = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_segmentation_dimensions` WHERE feature_date = inference_date LIMIT 1) - AND UWM.processed_timestamp = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_lookback_metrics` WHERE feature_date = inference_date LIMIT 1) - AND UM.processed_timestamp = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_segmentation_metrics` WHERE feature_date = inference_date LIMIT 1) - WINDOW - user_segmentation_dimensions_window AS (PARTITION BY UD.user_pseudo_id, UD.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), - user_lookback_metrics_window AS (PARTITION BY UWM.user_pseudo_id, UWM.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), - user_scoped_segmentation_metrics_window AS (PARTITION BY UM.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ); DELETE FROM `{{project_id}}.{{dataset}}.{{insert_table}}` WHERE TRUE; diff --git a/sql/procedure/audience_segmentation_training_preparation.sqlx b/sql/procedure/audience_segmentation_training_preparation.sqlx index 2fdba9c4..5e745095 100644 --- a/sql/procedure/audience_segmentation_training_preparation.sqlx +++ b/sql/procedure/audience_segmentation_training_preparation.sqlx @@ -17,9 +17,9 @@ DECLARE min_date DATE; SET max_date = (SELECT DATE_SUB(MAX(event_date), INTERVAL 1 DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); SET min_date = (SELECT DATE_ADD(MIN(event_date), INTERVAL 15 DAY) FROM `{{mds_project_id}}.{{mds_dataset}}.event`); -CREATE TEMP TABLE training_preparation as ( +CREATE TEMP TABLE training_preparation_ud as ( SELECT DISTINCT - UD.user_pseudo_id, + UD.user_pseudo_id, MAX(UD.user_id) OVER(user_segmentation_dimensions_window) AS user_id, UD.feature_date, MAX(UD.month_of_the_year) OVER(user_segmentation_dimensions_window) AS month_of_the_year, @@ -45,7 +45,21 @@ CREATE TEMP TABLE training_preparation as ( MAX(UD.first_traffic_source_medium) OVER(user_segmentation_dimensions_window) AS first_traffic_source_medium, MAX(UD.first_traffic_source_name) OVER(user_segmentation_dimensions_window) AS first_traffic_source_name, MAX(UD.first_traffic_source_source) OVER(user_segmentation_dimensions_window) AS first_traffic_source_source, - MAX(UD.has_signed_in_with_user_id) OVER(user_segmentation_dimensions_window) AS has_signed_in_with_user_id, + MAX(UD.has_signed_in_with_user_id) OVER(user_segmentation_dimensions_window) AS has_signed_in_with_user_id +FROM + `{{feature_store_project_id}}.{{feature_store_dataset}}.user_segmentation_dimensions` UD +WHERE + -- Define the training+validation subset interval + UD.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) +WINDOW + user_segmentation_dimensions_window AS (PARTITION BY UD.user_pseudo_id, UD.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +); + + +CREATE TEMP TABLE training_preparation_uwm as ( + SELECT DISTINCT + UWM.user_pseudo_id, + UWM.feature_date, MAX(UWM.active_users_past_1_7_day) OVER(user_lookback_metrics_window) AS active_users_past_1_7_day, MAX(UWM.active_users_past_8_14_day) OVER(user_lookback_metrics_window) AS active_users_past_8_14_day, MAX(UWM.purchases_past_1_7_day) OVER(user_lookback_metrics_window) AS purchases_past_1_7_day, @@ -59,7 +73,19 @@ CREATE TEMP TABLE training_preparation as ( MAX(UWM.checkouts_past_1_7_day) OVER(user_lookback_metrics_window) AS checkouts_past_1_7_day, MAX(UWM.checkouts_past_8_14_day) OVER(user_lookback_metrics_window) AS checkouts_past_8_14_day, MAX(UWM.ltv_revenue_past_1_7_day) OVER(user_lookback_metrics_window) AS ltv_revenue_past_1_7_day, - MAX(UWM.ltv_revenue_past_7_15_day) OVER(user_lookback_metrics_window) AS ltv_revenue_past_7_15_day, + MAX(UWM.ltv_revenue_past_7_15_day) OVER(user_lookback_metrics_window) AS ltv_revenue_past_7_15_day +FROM + `{{feature_store_project_id}}.{{feature_store_dataset}}.user_lookback_metrics` UWM +WHERE + -- Define the training+validation subset interval + UWM.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) +WINDOW + user_lookback_metrics_window AS (PARTITION BY UWM.user_pseudo_id, UWM.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +); + +CREATE TEMP TABLE training_preparation_um as ( + SELECT DISTINCT + UM.feature_date, MAX(UM.purchasers_users) OVER(user_scoped_segmentation_metrics_window) AS purchasers_users, MAX(UM.average_daily_purchasers) OVER(user_scoped_segmentation_metrics_window) AS average_daily_purchasers, MAX(UM.active_users) OVER(user_scoped_segmentation_metrics_window) AS active_users, @@ -86,23 +112,94 @@ CREATE TEMP TABLE training_preparation as ( MAX(UM.avg_user_conversion_rate) OVER(user_scoped_segmentation_metrics_window) AS avg_user_conversion_rate, MAX(UM.avg_session_conversion_rate) OVER(user_scoped_segmentation_metrics_window) AS avg_session_conversion_rate FROM - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_segmentation_dimensions` UD + `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_segmentation_metrics` UM +WHERE + -- Define the training+validation subset interval + UM.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) +WINDOW + user_scoped_segmentation_metrics_window AS (PARTITION BY UM.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +); + + +CREATE TEMP TABLE training_preparation as ( + SELECT DISTINCT + UD.user_pseudo_id, + UD.user_id, + UD.feature_date, + UD.month_of_the_year, + UD.week_of_the_year, + UD.day_of_the_month, + UD.day_of_week, + UD.device_category, + UD.device_mobile_brand_name, + UD.device_mobile_model_name, + UD.device_os, + UD.device_os_version, + UD.device_language, + UD.device_web_browser, + UD.device_web_browser_version, + UD.geo_sub_continent, + UD.geo_country, + UD.geo_region, + UD.geo_city, + UD.geo_metro, + UD.last_traffic_source_medium, + UD.last_traffic_source_name, + UD.last_traffic_source_source, + UD.first_traffic_source_medium, + UD.first_traffic_source_name, + UD.first_traffic_source_source, + UD.has_signed_in_with_user_id, + UWM.active_users_past_1_7_day, + UWM.active_users_past_8_14_day, + UWM.purchases_past_1_7_day, + UWM.purchases_past_8_14_day, + UWM.visits_past_1_7_day, + UWM.visits_past_8_14_day, + UWM.view_items_past_1_7_day, + UWM.view_items_past_8_14_day, + UWM.add_to_carts_past_1_7_day, + UWM.add_to_carts_past_8_14_day, + UWM.checkouts_past_1_7_day, + UWM.checkouts_past_8_14_day, + UWM.ltv_revenue_past_1_7_day, + UWM.ltv_revenue_past_7_15_day, + UM.purchasers_users, + UM.average_daily_purchasers, + UM.active_users, + UM.DAU, + UM.MAU, + UM.WAU, + UM.dau_per_mau, + UM.dau_per_wau, + UM.wau_per_mau, + UM.users_engagement_duration_seconds, + UM.average_engagement_time, + UM.average_engagement_time_per_session, + UM.average_sessions_per_user, + UM.ARPPU, + UM.ARPU, + UM.average_daily_revenue, + UM.max_daily_revenue, + UM.min_daily_revenue, + UM.new_users, + UM.returning_users, + UM.first_time_purchasers, + UM.first_time_purchaser_conversion, + UM.first_time_purchasers_per_new_user, + UM.avg_user_conversion_rate, + UM.avg_session_conversion_rate +FROM + training_preparation_ud UD INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_lookback_metrics` UWM + training_preparation_uwm UWM ON UWM.user_pseudo_id = UD.user_pseudo_id AND UWM.feature_date = UD.feature_date INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_segmentation_metrics` UM + training_preparation_um UM ON UM.feature_date = UD.feature_date -WHERE - -- Define the training+validation subset interval - UD.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) -WINDOW - user_segmentation_dimensions_window AS (PARTITION BY UD.user_pseudo_id, UD.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), - user_lookback_metrics_window AS (PARTITION BY UWM.user_pseudo_id, UWM.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), - user_scoped_segmentation_metrics_window AS (PARTITION BY UM.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ); diff --git a/sql/procedure/customer_lifetime_value_inference_preparation.sqlx b/sql/procedure/customer_lifetime_value_inference_preparation.sqlx index 3f516d67..4aef8a71 100644 --- a/sql/procedure/customer_lifetime_value_inference_preparation.sqlx +++ b/sql/procedure/customer_lifetime_value_inference_preparation.sqlx @@ -16,13 +16,13 @@ DECLARE lastest_processed_time_ud TIMESTAMP; DECLARE lastest_processed_time_uwm TIMESTAMP; DECLARE lastest_processed_time_um TIMESTAMP; +-- Setting procedure to lookback from the day before `inference_date` +SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY); + SET lastest_processed_time_ud = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_lifetime_dimensions` WHERE feature_date = inference_date LIMIT 1); SET lastest_processed_time_uwm = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_rolling_window_lifetime_metrics` WHERE feature_date = inference_date LIMIT 1); SET lastest_processed_time_um = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_lifetime_metrics` WHERE feature_date = inference_date LIMIT 1); --- Setting procedure to lookback from the day before `inference_date` -SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY); - CREATE OR REPLACE TEMP TABLE inference_preparation_ud as ( SELECT DISTINCT UD.user_pseudo_id, @@ -248,14 +248,14 @@ CREATE OR REPLACE TEMP TABLE inference_preparation as ( UM.lifetime_avg_user_conversion_rate, UM.lifetime_avg_session_conversion_rate FROM - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_lifetime_dimensions` UD + inference_preparation_ud UD INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_rolling_window_lifetime_metrics` UWM + inference_preparation_uwm UWM ON UWM.user_pseudo_id = UD.user_pseudo_id AND UWM.feature_date = UD.feature_date INNER JOIN - `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_lifetime_metrics` UM + inference_preparation_um UM ON UM.feature_date = UD.feature_date ); diff --git a/sql/procedure/customer_lifetime_value_training_preparation.sqlx b/sql/procedure/customer_lifetime_value_training_preparation.sqlx index 5debe187..b196a0fe 100644 --- a/sql/procedure/customer_lifetime_value_training_preparation.sqlx +++ b/sql/procedure/customer_lifetime_value_training_preparation.sqlx @@ -157,7 +157,7 @@ FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.customer_lifetime_value_label` LABEL WHERE -- Define the training subset interval - UD.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) + LABEL.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) WINDOW customer_lifetime_value_window AS (PARTITION BY LABEL.user_pseudo_id, LABEL.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ); diff --git a/sql/procedure/purchase_propensity_inference_preparation.sqlx b/sql/procedure/purchase_propensity_inference_preparation.sqlx index 446546f8..c4f306ca 100644 --- a/sql/procedure/purchase_propensity_inference_preparation.sqlx +++ b/sql/procedure/purchase_propensity_inference_preparation.sqlx @@ -17,14 +17,14 @@ DECLARE lastest_processed_time_useam TIMESTAMP; DECLARE lastest_processed_time_uwm TIMESTAMP; DECLARE lastest_processed_time_um TIMESTAMP; +-- Setting procedure to lookback from the day before `inference_date` +SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY); + SET lastest_processed_time_ud = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_dimensions` WHERE feature_date = inference_date LIMIT 1); SET lastest_processed_time_useam = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_session_event_aggregated_metrics` WHERE feature_date = inference_date LIMIT 1); SET lastest_processed_time_uwm = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_rolling_window_metrics` WHERE feature_date = inference_date LIMIT 1); SET lastest_processed_time_um = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_metrics` WHERE feature_date = inference_date LIMIT 1); --- Setting procedure to lookback from the day before `inference_date` -SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY); - CREATE OR REPLACE TEMP TABLE inference_preparation_ud as ( SELECT DISTINCT UD.user_pseudo_id, @@ -378,7 +378,6 @@ ON UM.feature_date = UD.feature_date ); - DELETE FROM `{{project_id}}.{{dataset}}.{{insert_table}}` WHERE TRUE; INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` diff --git a/sql/procedure/purchase_propensity_training_preparation.sqlx b/sql/procedure/purchase_propensity_training_preparation.sqlx index a033f66f..d5d062c6 100644 --- a/sql/procedure/purchase_propensity_training_preparation.sqlx +++ b/sql/procedure/purchase_propensity_training_preparation.sqlx @@ -227,7 +227,7 @@ FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.purchase_propensity_label` LABEL WHERE -- Define the training subset interval - UD.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) + LABEL.feature_date BETWEEN GREATEST(start_date, min_date) AND LEAST(end_date, max_date) WINDOW purchase_propensity_label_window AS (PARTITION BY LABEL.user_pseudo_id, LABEL.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ); @@ -409,7 +409,6 @@ ON AND LABEL.feature_date = UD.feature_date ); - CREATE OR REPLACE TEMP TABLE DataForTargetTable AS( SELECT DISTINCT CASE