Skip to content

Commit

Permalink
removing duplicates and expiration time from views
Browse files Browse the repository at this point in the history
  • Loading branch information
Carlos Timoteo committed Sep 28, 2023
1 parent cb5c665 commit 9773ba3
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 172 deletions.
20 changes: 10 additions & 10 deletions config/config.yaml.tftpl
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ bigquery:
is_case_insensitive: TRUE
description: "Feature Store dataset for Marketing behavioural modeling"
friendly_name: "Feature Store"
max_time_travel_hours: 48
max_time_travel_hours: 168
default_partition_expiration_days: 365
default_table_expiration_days: 365
purchase_propensity:
Expand All @@ -405,7 +405,7 @@ bigquery:
is_case_insensitive: TRUE
description: "Purchase Propensity Use Case dataset for Marketing behavioural modeling"
friendly_name: "Purchase Propensity Dataset"
max_time_travel_hours: 48
max_time_travel_hours: 168
default_partition_expiration_days: 365
default_table_expiration_days: 365
customer_lifetime_value:
Expand All @@ -416,7 +416,7 @@ bigquery:
is_case_insensitive: TRUE
description: "Customer Lifetime Value Use Case dataset for Marketing behavioural modeling"
friendly_name: "Customer Lifetime Value Dataset"
max_time_travel_hours: 48
max_time_travel_hours: 168
default_partition_expiration_days: 365
default_table_expiration_days: 365
audience_segmentation:
Expand All @@ -427,7 +427,7 @@ bigquery:
is_case_insensitive: TRUE
description: "Audience Segmentation Use Case dataset for Marketing behavioural modeling"
friendly_name: "Audience Segmentation Dataset"
max_time_travel_hours: 48
max_time_travel_hours: 168
default_partition_expiration_days: 365
default_table_expiration_days: 365
table:
Expand Down Expand Up @@ -751,7 +751,7 @@ bigquery:
feature_store_dataset: "feature_store"
mds_project_id: "${project_id}"
mds_dataset: "${mds_dataset}"
expiration_duration_hours: 48
expiration_duration_hours: 168
samples_per_split: 100000
customer_lifetime_value_label:
project_id: "${project_id}"
Expand All @@ -769,7 +769,7 @@ bigquery:
feature_store_dataset: "feature_store"
mds_project_id: "${project_id}"
mds_dataset: "${mds_dataset}"
expiration_duration_hours: 48
expiration_duration_hours: 168
purchase_propensity_label:
project_id: "${project_id}"
dataset: "feature_store"
Expand All @@ -786,7 +786,7 @@ bigquery:
feature_store_dataset: "feature_store"
mds_project_id: "${project_id}"
mds_dataset: "${mds_dataset}"
expiration_duration_hours: 48
expiration_duration_hours: 168
user_dimensions:
project_id: "${project_id}"
dataset: "feature_store"
Expand Down Expand Up @@ -864,23 +864,23 @@ bigquery:
feature_store_project_id: "${project_id}"
feature_store_dataset: "feature_store"
insert_table: "purchase_propensity_inference_preparation"
expiration_duration_hours: 48
expiration_duration_hours: 168
customer_lifetime_value_inference_preparation:
project_id: "${project_id}"
dataset: "customer_lifetime_value"
name: "customer_lifetime_value_inference_preparation"
feature_store_project_id: "${project_id}"
feature_store_dataset: "feature_store"
insert_table: "customer_lifetime_value_inference_preparation"
expiration_duration_hours: 48
expiration_duration_hours: 168
audience_segmentation_inference_preparation:
project_id: "${project_id}"
dataset: "audience_segmentation"
name: "audience_segmentation_inference_preparation"
feature_store_project_id: "${project_id}"
feature_store_dataset: "feature_store"
insert_table: "audience_segmentation_inference_preparation"
expiration_duration_hours: 48
expiration_duration_hours: 168
mds_dataset: "${mds_dataset}"


143 changes: 77 additions & 66 deletions sql/procedure/audience_segmentation_inference_preparation.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -18,71 +18,71 @@ SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY);
CREATE TEMP TABLE inference_preparation AS (
SELECT DISTINCT
UD.user_pseudo_id,
UD.user_id,
MAX(UD.user_id) OVER(user_segmentation_dimensions_window) AS user_id,
UD.feature_date,
UD.month_of_the_year,
UD.week_of_the_year,
UD.day_of_the_month,
UD.day_of_week,
UD.device_category,
UD.device_mobile_brand_name,
UD.device_mobile_model_name,
UD.device_os,
UD.device_os_version,
UD.device_language,
UD.device_web_browser,
UD.device_web_browser_version,
UD.geo_sub_continent,
UD.geo_country,
UD.geo_region,
UD.geo_city,
UD.geo_metro,
UD.last_traffic_source_medium,
UD.last_traffic_source_name,
UD.last_traffic_source_source,
UD.first_traffic_source_medium,
UD.first_traffic_source_name,
UD.first_traffic_source_source,
UD.has_signed_in_with_user_id,
UWM.active_users_past_1_7_day,
UWM.active_users_past_8_14_day,
UWM.purchases_past_1_7_day,
UWM.purchases_past_8_14_day,
UWM.visits_past_1_7_day,
UWM.visits_past_8_14_day,
UWM.view_items_past_1_7_day,
UWM.view_items_past_8_14_day,
UWM.add_to_carts_past_1_7_day,
UWM.add_to_carts_past_8_14_day,
UWM.checkouts_past_1_7_day,
UWM.checkouts_past_8_14_day,
UWM.ltv_revenue_past_1_7_day,
UWM.ltv_revenue_past_7_15_day,
UM.purchasers_users,
UM.average_daily_purchasers,
UM.active_users,
UM.DAU,
UM.MAU,
UM.WAU,
UM.dau_per_mau,
UM.dau_per_wau,
UM.wau_per_mau,
UM.users_engagement_duration_seconds,
UM.average_engagement_time,
UM.average_engagement_time_per_session,
UM.average_sessions_per_user,
UM.ARPPU,
UM.ARPU,
UM.average_daily_revenue,
UM.max_daily_revenue,
UM.min_daily_revenue,
UM.new_users,
UM.returning_users,
UM.first_time_purchasers,
UM.first_time_purchaser_conversion,
UM.first_time_purchasers_per_new_user,
UM.avg_user_conversion_rate,
UM.avg_session_conversion_rate,
MAX(UD.month_of_the_year) OVER(user_segmentation_dimensions_window) AS month_of_the_year,
MAX(UD.week_of_the_year) OVER(user_segmentation_dimensions_window) AS week_of_the_year,
MAX(UD.day_of_the_month) OVER(user_segmentation_dimensions_window) AS day_of_the_month,
MAX(UD.day_of_week) OVER(user_segmentation_dimensions_window) AS day_of_week,
MAX(UD.device_category) OVER(user_segmentation_dimensions_window) AS device_category,
MAX(UD.device_mobile_brand_name) OVER(user_segmentation_dimensions_window) AS device_mobile_brand_name,
MAX(UD.device_mobile_model_name) OVER(user_segmentation_dimensions_window) AS device_mobile_model_name,
MAX(UD.device_os) OVER(user_segmentation_dimensions_window) AS device_os,
MAX(UD.device_os_version) OVER(user_segmentation_dimensions_window) AS device_os_version,
MAX(UD.device_language) OVER(user_segmentation_dimensions_window) AS device_language,
MAX(UD.device_web_browser) OVER(user_segmentation_dimensions_window) AS device_web_browser,
MAX(UD.device_web_browser_version) OVER(user_segmentation_dimensions_window) AS device_web_browser_version,
MAX(UD.geo_sub_continent) OVER(user_segmentation_dimensions_window) AS geo_sub_continent,
MAX(UD.geo_country) OVER(user_segmentation_dimensions_window) AS geo_country,
MAX(UD.geo_region) OVER(user_segmentation_dimensions_window) AS geo_region,
MAX(UD.geo_city) OVER(user_segmentation_dimensions_window) AS geo_city,
MAX(UD.geo_metro) OVER(user_segmentation_dimensions_window) AS geo_metro,
MAX(UD.last_traffic_source_medium) OVER(user_segmentation_dimensions_window) AS last_traffic_source_medium,
MAX(UD.last_traffic_source_name) OVER(user_segmentation_dimensions_window) AS last_traffic_source_name,
MAX(UD.last_traffic_source_source) OVER(user_segmentation_dimensions_window) AS last_traffic_source_source,
MAX(UD.first_traffic_source_medium) OVER(user_segmentation_dimensions_window) AS first_traffic_source_medium,
MAX(UD.first_traffic_source_name) OVER(user_segmentation_dimensions_window) AS first_traffic_source_name,
MAX(UD.first_traffic_source_source) OVER(user_segmentation_dimensions_window) AS first_traffic_source_source,
MAX(UD.has_signed_in_with_user_id) OVER(user_segmentation_dimensions_window) AS has_signed_in_with_user_id,
MAX(UWM.active_users_past_1_7_day) OVER(user_lookback_metrics_window) AS active_users_past_1_7_day,
MAX(UWM.active_users_past_8_14_day) OVER(user_lookback_metrics_window) AS active_users_past_8_14_day,
MAX(UWM.purchases_past_1_7_day) OVER(user_lookback_metrics_window) AS purchases_past_1_7_day,
MAX(UWM.purchases_past_8_14_day) OVER(user_lookback_metrics_window) AS purchases_past_8_14_day,
MAX(UWM.visits_past_1_7_day) OVER(user_lookback_metrics_window) AS visits_past_1_7_day,
MAX(UWM.visits_past_8_14_day) OVER(user_lookback_metrics_window) AS visits_past_8_14_day,
MAX(UWM.view_items_past_1_7_day) OVER(user_lookback_metrics_window) AS view_items_past_1_7_day,
MAX(UWM.view_items_past_8_14_day) OVER(user_lookback_metrics_window) AS view_items_past_8_14_day,
MAX(UWM.add_to_carts_past_1_7_day) OVER(user_lookback_metrics_window) AS add_to_carts_past_1_7_day,
MAX(UWM.add_to_carts_past_8_14_day) OVER(user_lookback_metrics_window) AS add_to_carts_past_8_14_day,
MAX(UWM.checkouts_past_1_7_day) OVER(user_lookback_metrics_window) AS checkouts_past_1_7_day,
MAX(UWM.checkouts_past_8_14_day) OVER(user_lookback_metrics_window) AS checkouts_past_8_14_day,
MAX(UWM.ltv_revenue_past_1_7_day) OVER(user_lookback_metrics_window) AS ltv_revenue_past_1_7_day,
MAX(UWM.ltv_revenue_past_7_15_day) OVER(user_lookback_metrics_window) AS ltv_revenue_past_7_15_day,
MAX(UM.purchasers_users) OVER(user_scoped_segmentation_metrics_window) AS purchasers_users,
MAX(UM.average_daily_purchasers) OVER(user_scoped_segmentation_metrics_window) AS average_daily_purchasers,
MAX(UM.active_users) OVER(user_scoped_segmentation_metrics_window) AS active_users,
MAX(UM.DAU) OVER(user_scoped_segmentation_metrics_window) AS DAU,
MAX(UM.MAU) OVER(user_scoped_segmentation_metrics_window) AS MAU,
MAX(UM.WAU) OVER(user_scoped_segmentation_metrics_window) AS WAU,
MAX(UM.dau_per_mau) OVER(user_scoped_segmentation_metrics_window) AS dau_per_mau,
MAX(UM.dau_per_wau) OVER(user_scoped_segmentation_metrics_window) AS dau_per_wau,
MAX(UM.wau_per_mau) OVER(user_scoped_segmentation_metrics_window) AS wau_per_mau,
MAX(UM.users_engagement_duration_seconds) OVER(user_scoped_segmentation_metrics_window) AS users_engagement_duration_seconds,
MAX(UM.average_engagement_time) OVER(user_scoped_segmentation_metrics_window) AS average_engagement_time,
MAX(UM.average_engagement_time_per_session) OVER(user_scoped_segmentation_metrics_window) AS average_engagement_time_per_session,
MAX(UM.average_sessions_per_user) OVER(user_scoped_segmentation_metrics_window) AS average_sessions_per_user,
MAX(UM.ARPPU) OVER(user_scoped_segmentation_metrics_window) AS ARPPU,
MAX(UM.ARPU) OVER(user_scoped_segmentation_metrics_window) AS ARPU,
MAX(UM.average_daily_revenue) OVER(user_scoped_segmentation_metrics_window) AS average_daily_revenue,
MAX(UM.max_daily_revenue) OVER(user_scoped_segmentation_metrics_window) AS max_daily_revenue,
MAX(UM.min_daily_revenue) OVER(user_scoped_segmentation_metrics_window) AS min_daily_revenue,
MAX(UM.new_users) OVER(user_scoped_segmentation_metrics_window) AS new_users,
MAX(UM.returning_users) OVER(user_scoped_segmentation_metrics_window) AS returning_users,
MAX(UM.first_time_purchasers) OVER(user_scoped_segmentation_metrics_window) AS first_time_purchasers,
MAX(UM.first_time_purchaser_conversion) OVER(user_scoped_segmentation_metrics_window) AS first_time_purchaser_conversion,
MAX(UM.first_time_purchasers_per_new_user) OVER(user_scoped_segmentation_metrics_window) AS first_time_purchasers_per_new_user,
MAX(UM.avg_user_conversion_rate) OVER(user_scoped_segmentation_metrics_window) AS avg_user_conversion_rate,
MAX(UM.avg_session_conversion_rate) OVER(user_scoped_segmentation_metrics_window) AS avg_session_conversion_rate
FROM
`{{feature_store_project_id}}.{{feature_store_dataset}}.user_segmentation_dimensions` UD
INNER JOIN
Expand All @@ -96,8 +96,17 @@ CREATE TEMP TABLE inference_preparation AS (
UM.feature_date = UD.feature_date
WHERE
-- Define the training+validation subset interval
UD.feature_date = inference_date );
UD.feature_date = inference_date
AND UD.processed_timestamp = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_segmentation_dimensions` WHERE feature_date = inference_date LIMIT 1)
AND UWM.processed_timestamp = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_lookback_metrics` WHERE feature_date = inference_date LIMIT 1)
AND UM.processed_timestamp = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_segmentation_metrics` WHERE feature_date = inference_date LIMIT 1)
WINDOW
user_segmentation_dimensions_window AS (PARTITION BY UD.user_pseudo_id, UD.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
user_lookback_metrics_window AS (PARTITION BY UWM.user_pseudo_id, UWM.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
user_scoped_segmentation_metrics_window AS (PARTITION BY UM.feature_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
);

DELETE FROM `{{project_id}}.{{dataset}}.{{insert_table}}` WHERE TRUE;

INSERT INTO
`{{project_id}}.{{dataset}}.{{insert_table}}` (feature_date,
Expand Down Expand Up @@ -341,7 +350,9 @@ CREATE OR REPLACE VIEW
checkouts_past_1_7_day,
checkouts_past_8_14_day,
ltv_revenue_past_1_7_day,
ltv_revenue_past_7_15_day) OPTIONS( expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL {{expiration_duration_hours}} HOUR),
ltv_revenue_past_7_15_day)
OPTIONS(
--expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL {{expiration_duration_hours}} HOUR),
friendly_name="v_audience_segmentation_inference_15",
description="View Audience Segmentation dataset using 15 days back. View expires after 48h and should run daily.",
labels=[("org_unit",
Expand Down
Loading

0 comments on commit 9773ba3

Please sign in to comment.