Skip to content

Commit

Permalink
specify the columns in the backfill procedures, makes sure the script…
Browse files Browse the repository at this point in the history
…s are executable with a previous created feature table (#254)
  • Loading branch information
kingman authored Nov 26, 2024
1 parent 7b9fb8c commit 92b585a
Show file tree
Hide file tree
Showing 13 changed files with 358 additions and 13 deletions.
8 changes: 7 additions & 1 deletion sql/query/invoke_backfill_churn_propensity_label.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,13 @@ GROUP BY
);

-- Insert data into the target table, combining user information with churn and bounce status
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
user_pseudo_id,
churned,
bounced
)
SELECT DISTINCT
-- Current timestamp as the processing timestamp
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
9 changes: 8 additions & 1 deletion sql/query/invoke_backfill_customer_lifetime_value_label.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,14 @@ CREATE OR REPLACE TEMP TABLE future_revenue_per_user AS (
);

-- Insert data into the target table
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
user_pseudo_id,
pltv_revenue_30_days,
pltv_revenue_90_days,
pltv_revenue_180_days
)
SELECT DISTINCT
-- Current timestamp of the processing
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
21 changes: 20 additions & 1 deletion sql/query/invoke_backfill_purchase_propensity_label.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,26 @@ CREATE OR REPLACE TEMP TABLE future_purchases_per_user AS (
);

-- Inserts data into the target table
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
user_pseudo_id,
purchase_day_1,
purchase_day_2,
purchase_day_3,
purchase_day_4,
purchase_day_5,
purchase_day_6,
purchase_day_7,
purchase_day_8,
purchase_day_9,
purchase_day_10,
purchase_day_11,
purchase_day_12,
purchase_day_13,
purchase_day_14,
purchase_day_15_30
)
SELECT DISTINCT
-- Selects the current timestamp and assigns it to the column processed_timestamp
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
26 changes: 25 additions & 1 deletion sql/query/invoke_backfill_user_dimensions.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,31 @@ CREATE OR REPLACE TEMP TABLE events_users as (
;

-- Inserting aggregated user data into the target table.
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
user_pseudo_id,
user_id,
user_ltv_revenue,
device_category,
device_mobile_brand_name,
device_mobile_model_name,
device_os,
device_language,
device_web_browser,
geo_sub_continent,
geo_country,
geo_region,
geo_city,
geo_metro,
last_traffic_source_medium,
last_traffic_source_name,
last_traffic_source_source,
first_traffic_source_medium,
first_traffic_source_name,
first_traffic_source_source,
has_signed_in_with_user_id
)
SELECT DISTINCT
-- Timestamp of the data processing
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
26 changes: 25 additions & 1 deletion sql/query/invoke_backfill_user_lifetime_dimensions.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,31 @@ CREATE OR REPLACE TEMP TABLE events_users as (
-- This code block inserts data into the specified table, combining information from the "events_users" table
-- and the "user_dimensions_event_session_scoped" table.
-- It aggregates user-level features for each user and date.
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
user_pseudo_id,
user_id,
user_ltv_revenue,
device_category,
device_mobile_brand_name,
device_mobile_model_name,
device_os,
device_language,
device_web_browser,
geo_sub_continent,
geo_country,
geo_region,
geo_city,
geo_metro,
last_traffic_source_medium,
last_traffic_source_name,
last_traffic_source_source,
first_traffic_source_medium,
first_traffic_source_name,
first_traffic_source_source,
has_signed_in_with_user_id
)
SELECT DISTINCT
-- The current timestamp.
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
20 changes: 19 additions & 1 deletion sql/query/invoke_backfill_user_lookback_metrics.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,25 @@ AND D.device_os IS NOT NULL
-- This code is part of a larger process for building a machine learning model that predicts
-- user behavior based on their past activity. The features generated by this code can be used
-- as input to the model, helping it learn patterns and make predictions.
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
user_pseudo_id,
active_users_past_1_7_day,
active_users_past_8_14_day,
purchases_past_1_7_day,
purchases_past_8_14_day,
visits_past_1_7_day,
visits_past_8_14_day,
view_items_past_1_7_day,
view_items_past_8_14_day,
add_to_carts_past_1_7_day,
add_to_carts_past_8_14_day,
checkouts_past_1_7_day,
checkouts_past_8_14_day,
ltv_revenue_past_1_7_day,
ltv_revenue_past_7_15_day
)
SELECT DISTINCT
-- Timestamp indicating when the data was processed
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,50 @@ AND D.device_os IS NOT NULL
-- This code is part of a larger process for building a machine learning model that predicts
-- user behavior based on their past activity. The features generated by this code can be used
-- as input to the model, helping it learn patterns and make predictions.
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
user_pseudo_id,
active_users_past_1_30_day,
active_users_past_30_60_day,
active_users_past_60_90_day,
active_users_past_90_120_day,
active_users_past_120_150_day,
active_users_past_150_180_day,
purchases_past_1_30_day,
purchases_past_30_60_day,
purchases_past_60_90_day,
purchases_past_90_120_day,
purchases_past_120_150_day,
purchases_past_150_180_day,
visits_past_1_30_day,
visits_past_30_60_day,
visits_past_60_90_day,
visits_past_90_120_day,
visits_past_120_150_day,
visits_past_150_180_day,
view_items_past_1_30_day,
view_items_past_30_60_day,
view_items_past_60_90_day,
view_items_past_90_120_day,
view_items_past_120_150_day,
view_items_past_150_180_day,
add_to_carts_past_1_30_day,
add_to_carts_past_30_60_day,
add_to_carts_past_60_90_day,
add_to_carts_past_90_120_day,
add_to_carts_past_120_150_day,
add_to_carts_past_150_180_day,
checkouts_past_1_30_day,
checkouts_past_30_60_day,
checkouts_past_60_90_day,
checkouts_past_90_120_day,
checkouts_past_120_150_day,
checkouts_past_150_180_day,
ltv_revenue_past_1_30_day,
ltv_revenue_past_30_90_day,
ltv_revenue_past_90_180_day
)
SELECT DISTINCT
-- This selects the current timestamp and assigns it to the column processed_timestamp.
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
60 changes: 59 additions & 1 deletion sql/query/invoke_backfill_user_rolling_window_metrics.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,65 @@ CREATE OR REPLACE TEMP TABLE events_users as (
-- table and several temporary tables containing rolling window features. The resulting data
-- represents user-level features for each user and date, capturing their past activity within
-- different time windows.
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
user_pseudo_id,
active_users_past_1_day,
active_users_past_2_day,
active_users_past_3_day,
active_users_past_4_day,
active_users_past_5_day,
active_users_past_6_day,
active_users_past_7_day,
active_users_past_8_14_day,
active_users_past_15_30_day,
purchases_past_1_day,
purchases_past_2_day,
purchases_past_3_day,
purchases_past_4_day,
purchases_past_5_day,
purchases_past_6_day,
purchases_past_7_day,
purchases_past_8_14_day,
purchases_past_15_30_day,
visits_past_1_day,
visits_past_2_day,
visits_past_3_day,
visits_past_4_day,
visits_past_5_day,
visits_past_6_day,
visits_past_7_day,
visits_past_8_14_day,
visits_past_15_30_day,
view_items_past_1_day,
view_items_past_2_day,
view_items_past_3_day,
view_items_past_4_day,
view_items_past_5_day,
view_items_past_6_day,
view_items_past_7_day,
view_items_past_8_14_day,
view_items_past_15_30_day,
add_to_carts_past_1_day,
add_to_carts_past_2_day,
add_to_carts_past_3_day,
add_to_carts_past_4_day,
add_to_carts_past_5_day,
add_to_carts_past_6_day,
add_to_carts_past_7_day,
add_to_carts_past_8_14_day,
add_to_carts_past_15_30_day,
checkouts_past_1_day,
checkouts_past_2_day,
checkouts_past_3_day,
checkouts_past_4_day,
checkouts_past_5_day,
checkouts_past_6_day,
checkouts_past_7_day,
checkouts_past_8_14_day,
checkouts_past_15_30_day
)
SELECT DISTINCT
-- This selects the current timestamp and assigns it to the column processed_timestamp.
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
30 changes: 29 additions & 1 deletion sql/query/invoke_backfill_user_scoped_lifetime_metrics.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,35 @@ CREATE OR REPLACE TEMP TABLE first_purchasers as (
);

-- This SQL code calculates various user engagement and revenue metrics at a daily level and inserts the results into a target table. It leverages several temporary tables created earlier in the script to aggregate data efficiently.
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
lifetime_purchasers_users,
lifetime_average_daily_purchasers,
lifetime_active_users,
lifetime_DAU,
lifetime_MAU,
lifetime_WAU,
lifetime_dau_per_mau,
lifetime_dau_per_wau,
lifetime_wau_per_mau,
lifetime_users_engagement_duration_seconds,
lifetime_average_engagement_time,
lifetime_average_engagement_time_per_session,
lifetime_average_sessions_per_user,
lifetime_ARPPU,
lifetime_ARPU,
lifetime_average_daily_revenue,
lifetime_max_daily_revenue,
lifetime_min_daily_revenue,
lifetime_new_users,
lifetime_returning_users,
lifetime_first_time_purchasers,
lifetime_first_time_purchaser_conversion,
lifetime_first_time_purchasers_per_new_user,
lifetime_avg_user_conversion_rate,
lifetime_avg_session_conversion_rate
)
SELECT
-- Records the current timestamp when the query is executed.
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
30 changes: 29 additions & 1 deletion sql/query/invoke_backfill_user_scoped_metrics.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,35 @@ CREATE OR REPLACE TEMP TABLE new_users_ as (
);

-- Insert data into the target table after calculating various user engagement and revenue metrics.
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
purchasers_users,
average_daily_purchasers,
active_users,
DAU,
MAU,
WAU,
dau_per_mau,
dau_per_wau,
wau_per_mau,
users_engagement_duration_seconds,
average_engagement_time,
average_engagement_time_per_session,
average_sessions_per_user,
ARPPU,
ARPU,
average_daily_revenue,
max_daily_revenue,
min_daily_revenue,
new_users,
returning_users,
first_time_purchasers,
first_time_purchaser_conversion,
first_time_purchasers_per_new_user,
avg_user_conversion_rate,
avg_session_conversion_rate
)
SELECT DISTINCT
-- Record the current timestamp when the query is executed.
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
30 changes: 29 additions & 1 deletion sql/query/invoke_backfill_user_scoped_segmentation_metrics.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,35 @@ GROUP BY feature_date
);

-- This SQL code calculates various user engagement and revenue metrics at a daily level and inserts the results into a target table. It leverages several temporary tables created earlier in the script to aggregate data efficiently.
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}`
INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
processed_timestamp,
feature_date,
purchasers_users,
average_daily_purchasers,
active_users,
DAU,
MAU,
WAU,
dau_per_mau,
dau_per_wau,
wau_per_mau,
users_engagement_duration_seconds,
average_engagement_time,
average_engagement_time_per_session,
average_sessions_per_user,
ARPPU,
ARPU,
average_daily_revenue,
max_daily_revenue,
min_daily_revenue,
new_users,
returning_users,
first_time_purchasers,
first_time_purchaser_conversion,
first_time_purchasers_per_new_user,
avg_user_conversion_rate,
avg_session_conversion_rate
)
SELECT
-- Records the current timestamp when the query is executed.
CURRENT_TIMESTAMP() AS processed_timestamp,
Expand Down
Loading

0 comments on commit 92b585a

Please sign in to comment.