Skip to content

Commit

Permalink
tabular workflow dataset preparations
Browse files Browse the repository at this point in the history
  • Loading branch information
Carlos Timoteo committed Sep 27, 2023
1 parent 2456db1 commit 18ec970
Show file tree
Hide file tree
Showing 4 changed files with 506 additions and 492 deletions.
219 changes: 109 additions & 110 deletions sql/procedure/customer_lifetime_value_inference_preparation.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -17,116 +17,115 @@ SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY);

CREATE TEMP TABLE inference_preparation as (
SELECT DISTINCT
UD.user_pseudo_id,
UD.user_id,
UD.feature_date,
UD.month_of_the_year,
UD.week_of_the_year,
UD.day_of_the_month,
UD.day_of_week,
--UD.hour_of_day,
--UD.nth_day,
--UD.nth_hour,
--UD.nth_week,
--UD.nth_month,
UD.device_category,
UD.device_mobile_brand_name,
UD.device_mobile_model_name,
UD.device_os,
UD.device_os_version,
UD.device_language,
UD.device_web_browser,
UD.device_web_browser_version,
UD.geo_sub_continent,
UD.geo_country,
UD.geo_region,
UD.geo_city,
UD.geo_metro,
UD.last_traffic_source_medium,
UD.last_traffic_source_name,
UD.last_traffic_source_source,
UD.first_traffic_source_medium,
UD.first_traffic_source_name,
UD.first_traffic_source_source,
UD.has_signed_in_with_user_id,
UWM.active_users_past_1_30_day,
UWM.active_users_past_30_60_day,
UWM.active_users_past_60_90_day,
UWM.active_users_past_90_120_day,
UWM.active_users_past_120_150_day,
UWM.active_users_past_150_180_day,
UWM.purchases_past_1_30_day,
UWM.purchases_past_30_60_day,
UWM.purchases_past_60_90_day,
UWM.purchases_past_90_120_day,
UWM.purchases_past_120_150_day,
UWM.purchases_past_150_180_day,
UWM.visits_past_1_30_day,
UWM.visits_past_30_60_day,
UWM.visits_past_60_90_day,
UWM.visits_past_90_120_day,
UWM.visits_past_120_150_day,
UWM.visits_past_150_180_day,
UWM.view_items_past_1_30_day,
UWM.view_items_past_30_60_day,
UWM.view_items_past_60_90_day,
UWM.view_items_past_90_120_day,
UWM.view_items_past_120_150_day,
UWM.view_items_past_150_180_day,
UWM.add_to_carts_past_1_30_day,
UWM.add_to_carts_past_30_60_day,
UWM.add_to_carts_past_60_90_day,
UWM.add_to_carts_past_90_120_day,
UWM.add_to_carts_past_120_150_day,
UWM.add_to_carts_past_150_180_day,
UWM.checkouts_past_1_30_day,
UWM.checkouts_past_30_60_day,
UWM.checkouts_past_60_90_day,
UWM.checkouts_past_90_120_day,
UWM.checkouts_past_120_150_day,
UWM.checkouts_past_150_180_day,
UWM.ltv_revenue_past_1_30_day,
UWM.ltv_revenue_past_30_90_day,
UWM.ltv_revenue_past_90_180_day,
UM.lifetime_purchasers_users,
UM.lifetime_average_daily_purchasers,
UM.lifetime_active_users,
UM.lifetime_DAU,
UM.lifetime_MAU,
UM.lifetime_WAU,
UM.lifetime_dau_per_mau,
UM.lifetime_dau_per_wau,
UM.lifetime_wau_per_mau,
UM.lifetime_users_engagement_duration_seconds,
UM.lifetime_average_engagement_time,
UM.lifetime_average_engagement_time_per_session,
UM.lifetime_average_sessions_per_user,
UM.lifetime_ARPPU,
UM.lifetime_ARPU,
UM.lifetime_average_daily_revenue,
UM.lifetime_max_daily_revenue,
UM.lifetime_min_daily_revenue,
UM.lifetime_new_users,
UM.lifetime_returning_users,
UM.lifetime_first_time_purchasers,
UM.lifetime_first_time_purchaser_conversion,
UM.lifetime_first_time_purchasers_per_new_user,
UM.lifetime_avg_user_conversion_rate,
UM.lifetime_avg_session_conversion_rate
FROM
`{{feature_store_project_id}}.{{feature_store_dataset}}.user_lifetime_dimensions` UD
INNER JOIN
`{{feature_store_project_id}}.{{feature_store_dataset}}.user_rolling_window_lifetime_metrics` UWM
ON
UWM.user_pseudo_id = UD.user_pseudo_id
AND UWM.feature_date = UD.feature_date
INNER JOIN
`{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_lifetime_metrics` UM
ON
UM.feature_date = UD.feature_date
WHERE
-- Define the training+validation subset interval
UD.feature_date = inference_date
UD.user_pseudo_id,
LAST_VALUE(UD.user_id) OVER(user_lifetime_dimensions_window) AS user_id,
UD.feature_date,
LAST_VALUE(UD.month_of_the_year) OVER(user_lifetime_dimensions_window) AS month_of_the_year,
LAST_VALUE(UD.week_of_the_year) OVER(user_lifetime_dimensions_window) AS week_of_the_year,
LAST_VALUE(UD.day_of_the_month) OVER(user_lifetime_dimensions_window) AS day_of_the_month,
LAST_VALUE(UD.day_of_week) OVER(user_lifetime_dimensions_window) AS day_of_week,
LAST_VALUE(UD.device_category) OVER(user_lifetime_dimensions_window) AS device_category,
LAST_VALUE(UD.device_mobile_brand_name) OVER(user_lifetime_dimensions_window) AS device_mobile_brand_name,
LAST_VALUE(UD.device_mobile_model_name) OVER(user_lifetime_dimensions_window) AS device_mobile_model_name,
LAST_VALUE(UD.device_os) OVER(user_lifetime_dimensions_window) AS device_os,
LAST_VALUE(UD.device_os_version) OVER(user_lifetime_dimensions_window) AS device_os_version,
LAST_VALUE(UD.device_language) OVER(user_lifetime_dimensions_window) AS device_language,
LAST_VALUE(UD.device_web_browser) OVER(user_lifetime_dimensions_window) AS device_web_browser,
LAST_VALUE(UD.device_web_browser_version) OVER(user_lifetime_dimensions_window) AS device_web_browser_version,
LAST_VALUE(UD.geo_sub_continent) OVER(user_lifetime_dimensions_window) AS geo_sub_continent,
LAST_VALUE(UD.geo_country) OVER(user_lifetime_dimensions_window) AS geo_country,
LAST_VALUE(UD.geo_region) OVER(user_lifetime_dimensions_window) AS geo_region,
LAST_VALUE(UD.geo_city) OVER(user_lifetime_dimensions_window) AS geo_city,
LAST_VALUE(UD.geo_metro) OVER(user_lifetime_dimensions_window) AS geo_metro,
LAST_VALUE(UD.last_traffic_source_medium) OVER(user_lifetime_dimensions_window) AS last_traffic_source_medium,
LAST_VALUE(UD.last_traffic_source_name) OVER(user_lifetime_dimensions_window) AS last_traffic_source_name,
LAST_VALUE(UD.last_traffic_source_source) OVER(user_lifetime_dimensions_window) AS last_traffic_source_source,
LAST_VALUE(UD.first_traffic_source_medium) OVER(user_lifetime_dimensions_window) AS first_traffic_source_medium,
LAST_VALUE(UD.first_traffic_source_name) OVER(user_lifetime_dimensions_window) AS first_traffic_source_name,
LAST_VALUE(UD.first_traffic_source_source) OVER(user_lifetime_dimensions_window) AS first_traffic_source_source,
LAST_VALUE(UD.has_signed_in_with_user_id) OVER(user_lifetime_dimensions_window) AS has_signed_in_with_user_id,
LAST_VALUE(UWM.active_users_past_1_30_day) OVER(user_lifetime_rolling_window) AS active_users_past_1_30_day,
LAST_VALUE(UWM.active_users_past_30_60_day) OVER(user_lifetime_rolling_window) AS active_users_past_30_60_day,
LAST_VALUE(UWM.active_users_past_60_90_day) OVER(user_lifetime_rolling_window) AS active_users_past_60_90_day,
LAST_VALUE(UWM.active_users_past_90_120_day) OVER(user_lifetime_rolling_window) AS active_users_past_90_120_day,
LAST_VALUE(UWM.active_users_past_120_150_day) OVER(user_lifetime_rolling_window) AS active_users_past_120_150_day,
LAST_VALUE(UWM.active_users_past_150_180_day) OVER(user_lifetime_rolling_window) AS active_users_past_150_180_day,
LAST_VALUE(UWM.purchases_past_1_30_day) OVER(user_lifetime_rolling_window) AS purchases_past_1_30_day,
LAST_VALUE(UWM.purchases_past_30_60_day) OVER(user_lifetime_rolling_window) AS purchases_past_30_60_day,
LAST_VALUE(UWM.purchases_past_60_90_day) OVER(user_lifetime_rolling_window) AS purchases_past_60_90_day,
LAST_VALUE(UWM.purchases_past_90_120_day) OVER(user_lifetime_rolling_window) AS purchases_past_90_120_day,
LAST_VALUE(UWM.purchases_past_120_150_day) OVER(user_lifetime_rolling_window) AS purchases_past_120_150_day,
LAST_VALUE(UWM.purchases_past_150_180_day) OVER(user_lifetime_rolling_window) AS purchases_past_150_180_day,
LAST_VALUE(UWM.visits_past_1_30_day) OVER(user_lifetime_rolling_window) AS visits_past_1_30_day,
LAST_VALUE(UWM.visits_past_30_60_day) OVER(user_lifetime_rolling_window) AS visits_past_30_60_day,
LAST_VALUE(UWM.visits_past_60_90_day) OVER(user_lifetime_rolling_window) AS visits_past_60_90_day,
LAST_VALUE(UWM.visits_past_90_120_day) OVER(user_lifetime_rolling_window) AS visits_past_90_120_day,
LAST_VALUE(UWM.visits_past_120_150_day) OVER(user_lifetime_rolling_window) AS visits_past_120_150_day,
LAST_VALUE(UWM.visits_past_150_180_day) OVER(user_lifetime_rolling_window) AS visits_past_150_180_day,
LAST_VALUE(UWM.view_items_past_1_30_day) OVER(user_lifetime_rolling_window) AS view_items_past_1_30_day,
LAST_VALUE(UWM.view_items_past_30_60_day) OVER(user_lifetime_rolling_window) AS view_items_past_30_60_day,
LAST_VALUE(UWM.view_items_past_60_90_day) OVER(user_lifetime_rolling_window) AS view_items_past_60_90_day,
LAST_VALUE(UWM.view_items_past_90_120_day) OVER(user_lifetime_rolling_window) AS view_items_past_90_120_day,
LAST_VALUE(UWM.view_items_past_120_150_day) OVER(user_lifetime_rolling_window) AS view_items_past_120_150_day,
LAST_VALUE(UWM.view_items_past_150_180_day) OVER(user_lifetime_rolling_window) AS view_items_past_150_180_day,
LAST_VALUE(UWM.add_to_carts_past_1_30_day) OVER(user_lifetime_rolling_window) AS add_to_carts_past_1_30_day,
LAST_VALUE(UWM.add_to_carts_past_30_60_day) OVER(user_lifetime_rolling_window) AS add_to_carts_past_30_60_day,
LAST_VALUE(UWM.add_to_carts_past_60_90_day) OVER(user_lifetime_rolling_window) AS add_to_carts_past_60_90_day,
LAST_VALUE(UWM.add_to_carts_past_90_120_day) OVER(user_lifetime_rolling_window) AS add_to_carts_past_90_120_day,
LAST_VALUE(UWM.add_to_carts_past_120_150_day) OVER(user_lifetime_rolling_window) AS add_to_carts_past_120_150_day,
LAST_VALUE(UWM.add_to_carts_past_150_180_day) OVER(user_lifetime_rolling_window) AS add_to_carts_past_150_180_day,
LAST_VALUE(UWM.checkouts_past_1_30_day) OVER(user_lifetime_rolling_window) AS checkouts_past_1_30_day,
LAST_VALUE(UWM.checkouts_past_30_60_day) OVER(user_lifetime_rolling_window) AS checkouts_past_30_60_day,
LAST_VALUE(UWM.checkouts_past_60_90_day) OVER(user_lifetime_rolling_window) AS checkouts_past_60_90_day,
LAST_VALUE(UWM.checkouts_past_90_120_day) OVER(user_lifetime_rolling_window) AS checkouts_past_90_120_day,
LAST_VALUE(UWM.checkouts_past_120_150_day) OVER(user_lifetime_rolling_window) AS checkouts_past_120_150_day,
LAST_VALUE(UWM.checkouts_past_150_180_day) OVER(user_lifetime_rolling_window) AS checkouts_past_150_180_day,
LAST_VALUE(UWM.ltv_revenue_past_1_30_day) OVER(user_lifetime_rolling_window) AS ltv_revenue_past_1_30_day,
LAST_VALUE(UWM.ltv_revenue_past_30_90_day) OVER(user_lifetime_rolling_window) AS ltv_revenue_past_30_90_day,
LAST_VALUE(UWM.ltv_revenue_past_90_180_day) OVER(user_lifetime_rolling_window) AS ltv_revenue_past_90_180_day,
LAST_VALUE(UM.lifetime_purchasers_users) OVER(user_scoped_lifetime_metrics_window) AS lifetime_purchasers_users,
LAST_VALUE(UM.lifetime_average_daily_purchasers) OVER(user_scoped_lifetime_metrics_window) AS lifetime_average_daily_purchasers,
LAST_VALUE(UM.lifetime_active_users) OVER(user_scoped_lifetime_metrics_window) AS lifetime_active_users,
LAST_VALUE(UM.lifetime_DAU) OVER(user_scoped_lifetime_metrics_window) AS lifetime_DAU,
LAST_VALUE(UM.lifetime_MAU) OVER(user_scoped_lifetime_metrics_window) AS lifetime_MAU,
LAST_VALUE(UM.lifetime_WAU) OVER(user_scoped_lifetime_metrics_window) AS lifetime_WAU,
LAST_VALUE(UM.lifetime_dau_per_mau) OVER(user_scoped_lifetime_metrics_window) AS lifetime_dau_per_mau,
LAST_VALUE(UM.lifetime_dau_per_wau) OVER(user_scoped_lifetime_metrics_window) AS lifetime_dau_per_wau,
LAST_VALUE(UM.lifetime_wau_per_mau) OVER(user_scoped_lifetime_metrics_window) AS lifetime_wau_per_mau,
LAST_VALUE(UM.lifetime_users_engagement_duration_seconds) OVER(user_scoped_lifetime_metrics_window) AS lifetime_users_engagement_duration_seconds,
LAST_VALUE(UM.lifetime_average_engagement_time) OVER(user_scoped_lifetime_metrics_window) AS lifetime_average_engagement_time,
LAST_VALUE(UM.lifetime_average_engagement_time_per_session) OVER(user_scoped_lifetime_metrics_window) AS lifetime_average_engagement_time_per_session,
LAST_VALUE(UM.lifetime_average_sessions_per_user) OVER(user_scoped_lifetime_metrics_window) AS lifetime_average_sessions_per_user,
LAST_VALUE(UM.lifetime_ARPPU) OVER(user_scoped_lifetime_metrics_window) AS lifetime_ARPPU,
LAST_VALUE(UM.lifetime_ARPU) OVER(user_scoped_lifetime_metrics_window) AS lifetime_ARPU,
LAST_VALUE(UM.lifetime_average_daily_revenue) OVER(user_scoped_lifetime_metrics_window) AS lifetime_average_daily_revenue,
LAST_VALUE(UM.lifetime_max_daily_revenue) OVER(user_scoped_lifetime_metrics_window) AS lifetime_max_daily_revenue,
LAST_VALUE(UM.lifetime_min_daily_revenue) OVER(user_scoped_lifetime_metrics_window) AS lifetime_min_daily_revenue,
LAST_VALUE(UM.lifetime_new_users) OVER(user_scoped_lifetime_metrics_window) AS lifetime_new_users,
LAST_VALUE(UM.lifetime_returning_users) OVER(user_scoped_lifetime_metrics_window) AS lifetime_returning_users,
LAST_VALUE(UM.lifetime_first_time_purchasers) OVER(user_scoped_lifetime_metrics_window) AS lifetime_first_time_purchasers,
LAST_VALUE(UM.lifetime_first_time_purchaser_conversion) OVER(user_scoped_lifetime_metrics_window) AS lifetime_first_time_purchaser_conversion,
LAST_VALUE(UM.lifetime_first_time_purchasers_per_new_user) OVER(user_scoped_lifetime_metrics_window) AS lifetime_first_time_purchasers_per_new_user,
LAST_VALUE(UM.lifetime_avg_user_conversion_rate) OVER(user_scoped_lifetime_metrics_window) AS lifetime_avg_user_conversion_rate,
LAST_VALUE(UM.lifetime_avg_session_conversion_rate) OVER(user_scoped_lifetime_metrics_window) AS lifetime_avg_session_conversion_rate
FROM
`{{feature_store_project_id}}.{{feature_store_dataset}}.user_lifetime_dimensions` UD
INNER JOIN
`{{feature_store_project_id}}.{{feature_store_dataset}}.user_rolling_window_lifetime_metrics` UWM
ON
UWM.user_pseudo_id = UD.user_pseudo_id
AND UWM.feature_date = UD.feature_date
INNER JOIN
`{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_lifetime_metrics` UM
ON
UM.feature_date = UD.feature_date
WHERE
-- Define the training+validation subset interval
UD.feature_date = inference_date
WINDOW
user_lifetime_dimensions_window AS (PARTITION BY UD.user_pseudo_id, UD.feature_date ORDER BY UD.processed_timestamp ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
user_lifetime_rolling_window AS (PARTITION BY UWM.user_pseudo_id, UWM.feature_date ORDER BY UWM.processed_timestamp ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
user_scoped_lifetime_metrics_window AS (PARTITION BY UM.feature_date ORDER BY UM.processed_timestamp ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
);


Expand Down
Loading

0 comments on commit 18ec970

Please sign in to comment.