Skip to content

Commit

Permalink
fixing missings features and tables
Browse files Browse the repository at this point in the history
  • Loading branch information
Carlos Timoteo committed Dec 18, 2024
1 parent b31ea28 commit ec8bc04
Show file tree
Hide file tree
Showing 11 changed files with 255 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,42 @@ resource "google_bigquery_routine" "user_rolling_window_metrics" {
}
}

# This resource reads the contents of a local SQL file named user_rolling_window_lead_metrics.sql and
# stores it in a variable named user_rolling_window_lead_metrics_file.content.
# The SQL file is expected to contain the definition of a BigQuery procedure named user_rolling_window_lead_metrics.
data "local_file" "user_rolling_window_lead_metrics_file" {
filename = "${local.sql_dir}/procedure/user_rolling_window_lead_metrics.sql"
}

# The user_rolling_window_lead_metrics procedure is designed to prepare the features for the Purchase Propensity model.
# ##
# The procedure is typically invoked before training the Purchase Propensity model to ensure that the features data
# is in the correct format and ready for training.
resource "google_bigquery_routine" "user_rolling_window_lead_metrics" {
project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id
dataset_id = google_bigquery_dataset.feature_store.dataset_id
routine_id = "user_rolling_window_lead_metrics"
routine_type = "PROCEDURE"
language = "SQL"
definition_body = data.local_file.user_rolling_window_lead_metrics_file.content
description = "User-per-day granularity level metrics. Run this procedure daily. Metrics calculated using a rolling window operation."
arguments {
name = "input_date"
mode = "INOUT"
data_type = jsonencode({ "typeKind" : "DATE" })
}
arguments {
name = "end_date"
mode = "INOUT"
data_type = jsonencode({ "typeKind" : "DATE" })
}
arguments {
name = "rows_added"
mode = "OUT"
data_type = jsonencode({ "typeKind" : "INT64" })
}
}

# This resource reads the contents of a local SQL file named user_scoped_lifetime_metrics.sql
data "local_file" "user_scoped_lifetime_metrics_file" {
filename = "${local.sql_dir}/procedure/user_scoped_lifetime_metrics.sql"
Expand Down Expand Up @@ -1119,6 +1155,20 @@ resource "google_bigquery_routine" "invoke_backfill_user_rolling_window_metrics"
description = "Procedure that backfills the user_rolling_window_metrics feature table. Run this procedure occasionally before training the models."
}

data "local_file" "invoke_backfill_user_rolling_window_lead_metrics_file" {
filename = "${local.sql_dir}/query/invoke_backfill_user_rolling_window_lead_metrics.sql"
}

resource "google_bigquery_routine" "invoke_backfill_user_rolling_window_lead_metrics" {
project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id
dataset_id = google_bigquery_dataset.feature_store.dataset_id
routine_id = "invoke_backfill_user_rolling_window_lead_metrics"
routine_type = "PROCEDURE"
language = "SQL"
definition_body = data.local_file.invoke_backfill_user_rolling_window_lead_metrics_file.content
description = "Procedure that backfills the user_rolling_window_lead_metrics feature table. Run this procedure occasionally before training the models."
}


data "local_file" "invoke_backfill_user_scoped_lifetime_metrics_file" {
filename = "${local.sql_dir}/query/invoke_backfill_user_scoped_lifetime_metrics.sql"
Expand Down Expand Up @@ -1543,6 +1593,20 @@ resource "google_bigquery_routine" "invoke_user_rolling_window_metrics" {
}


data "local_file" "invoke_user_rolling_window_lead_metrics_file" {
filename = "${local.sql_dir}/query/invoke_user_rolling_window_lead_metrics.sql"
}

resource "google_bigquery_routine" "invoke_user_rolling_window_lead_metrics" {
project = null_resource.check_bigquery_api.id != "" ? local.feature_store_project_id : local.feature_store_project_id
dataset_id = google_bigquery_dataset.feature_store.dataset_id
routine_id = "invoke_user_rolling_window_lead_metrics"
routine_type = "PROCEDURE"
language = "SQL"
definition_body = data.local_file.invoke_user_rolling_window_lead_metrics_file.content
description = "Procedure that invokes the user_rolling_window_lead_metrics table. Daily granularity level. Run this procedure daily before running prediction pipelines."
}

data "local_file" "invoke_user_scoped_lifetime_metrics_file" {
filename = "${local.sql_dir}/query/invoke_user_scoped_lifetime_metrics.sql"
}
Expand Down Expand Up @@ -1636,8 +1700,8 @@ resource "null_resource" "create_gemini_model" {
# The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore
# any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed.
lifecycle {
ignore_changes = all
prevent_destroy = true
#ignore_changes = all
#prevent_destroy = true
}

depends_on = [
Expand Down
25 changes: 25 additions & 0 deletions infrastructure/terraform/modules/feature-store/bigquery-tables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,31 @@ resource "google_bigquery_table" "user_rolling_window_metrics" {
}
}

# This resource creates a BigQuery table named user_rolling_window_lead_metrics
# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id.
resource "google_bigquery_table" "user_rolling_window_lead_metrics" {
project = google_bigquery_dataset.feature_store.project
dataset_id = google_bigquery_dataset.feature_store.dataset_id
table_id = local.config_bigquery.table.user_rolling_window_lead_metrics.table_name
description = local.config_bigquery.table.user_rolling_window_lead_metrics.table_description

# The deletion_protection attribute specifies whether the table should be protected from deletion. In this case, it's set to false, which means that the table can be deleted.
deletion_protection = false
labels = {
version = "prod"
}

# The schema attribute specifies the schema of the table. In this case, the schema is defined in the JSON file.
schema = file("${local.sql_dir}/schema/table/user_rolling_window_lead_metrics.json")

# The lifecycle block is used to configure the lifecycle of the table. In this case, the ignore_changes attribute is set to all, which means that Terraform will ignore
# any changes to the table and will not attempt to update the table. The prevent_destroy attribute is set to true, which means that Terraform will prevent the table from being destroyed.
lifecycle {
ignore_changes = all
prevent_destroy = true
}
}

# This resource creates a BigQuery table named user_scoped_lifetime_metrics
# in the dataset specified by google_bigquery_dataset.feature_store.dataset_id.
resource "google_bigquery_table" "user_scoped_lifetime_metrics" {
Expand Down
2 changes: 2 additions & 0 deletions python/activation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def _add_argparse_args(cls, parser):
- purchase-propensity-15-15
- purchase-propensity-15-7
- churn-propensity-30-15
- lead-score-propensity-5-1
activation_type_configuration: The GCS path to the configuration file for all activation types.
"""

Expand Down Expand Up @@ -110,6 +111,7 @@ def _add_argparse_args(cls, parser):
purchase-propensity-15-15
purchase-propensity-15-7
churn-propensity-30-15
lead-score-propensity-5-1
''',
required=True
)
Expand Down
1 change: 1 addition & 0 deletions python/ga4_setup/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ def create_custom_dimensions(configuration: map):
create_custom_dimensions_for('CLTV', ['cltv_decile'], existing_dimensions, configuration)
create_custom_dimensions_for('Auto Audience Segmentation', ['a_a_s_prediction'], existing_dimensions, configuration)
create_custom_dimensions_for('Churn Propensity', ['c_p_prediction', 'c_p_decile'], existing_dimensions, configuration)
create_custom_dimensions_for('Lead Score Propensity', ['l_s_p_prediction', 'l_s_p_decile'], existing_dimensions, configuration)



Expand Down
10 changes: 5 additions & 5 deletions sql/procedure/lead_score_propensity_inference_preparation.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@

DECLARE lastest_processed_time_ud TIMESTAMP;
DECLARE lastest_processed_time_useam TIMESTAMP;
DECLARE lastest_processed_time_uwm TIMESTAMP;
DECLARE lastest_processed_time_uwlm TIMESTAMP;
DECLARE lastest_processed_time_um TIMESTAMP;

-- Setting procedure to lookback from the day before `inference_date`
SET inference_date = DATE_SUB(inference_date, INTERVAL 1 DAY);

SET lastest_processed_time_ud = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_dimensions` WHERE feature_date = inference_date LIMIT 1);
SET lastest_processed_time_useam = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_session_event_aggregated_metrics` WHERE feature_date = inference_date LIMIT 1);
SET lastest_processed_time_uwm = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_rolling_window_metrics` WHERE feature_date = inference_date LIMIT 1);
SET lastest_processed_time_uwlm = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_rolling_window_lead_metrics` WHERE feature_date = inference_date LIMIT 1);
SET lastest_processed_time_um = (SELECT MAX(processed_timestamp) FROM `{{feature_store_project_id}}.{{feature_store_dataset}}.user_scoped_metrics` WHERE feature_date = inference_date LIMIT 1);

CREATE OR REPLACE TEMP TABLE inference_preparation_ud as (
Expand Down Expand Up @@ -89,9 +89,9 @@ WINDOW
CREATE OR REPLACE TEMP TABLE inference_preparation_uwlm as (
SELECT DISTINCT
-- User pseudo id
UWM.user_pseudo_id,
UWLM.user_pseudo_id,
-- Feature date
UWM.feature_date,
UWLM.feature_date,
-- Calculate the maximum value for each metric over the window
MAX(UWLM.scroll_50_past_1_day) OVER(user_rolling_lead_window) AS scroll_50_past_1_day,
-- Maximum active users in the past 2 days
Expand Down Expand Up @@ -181,7 +181,7 @@ ON
WHERE
-- Filter for the features in the inferecen date
UWLM.feature_date = inference_date
AND UWLM.processed_timestamp = lastest_processed_time_uwm
AND UWLM.processed_timestamp = lastest_processed_time_uwlm
WINDOW
user_rolling_lead_window AS (PARTITION BY UWLM.user_pseudo_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
);
Expand Down
65 changes: 65 additions & 0 deletions sql/procedure/lead_score_propensity_training_preparation.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,16 @@ CREATE OR REPLACE TEMP TABLE training_preparation_uwlm as (
MAX(UWLM.view_search_results_past_4_day) OVER(user_rolling_lead_window) AS view_search_results_past_4_day,
-- Maximum view_search_results in the past 5 days
MAX(UWLM.view_search_results_past_5_day) OVER(user_rolling_lead_window) AS view_search_results_past_5_day,
-- Maximum file_download in the past 1 day
MAX(UWLM.file_download_past_1_day) OVER(user_rolling_lead_window) AS file_download_past_1_day,
-- Maximum file_download in the past 2 days
MAX(UWLM.file_download_past_2_day) OVER(user_rolling_lead_window) AS file_download_past_2_day,
-- Maximum file_download in the past 3 days
MAX(UWLM.file_download_past_3_day) OVER(user_rolling_lead_window) AS file_download_past_3_day,
-- Maximum file_download in the past 4 days
MAX(UWLM.file_download_past_4_day) OVER(user_rolling_lead_window) AS file_download_past_4_day,
-- Maximum file_download in the past 5 days
MAX(UWLM.file_download_past_5_day) OVER(user_rolling_lead_window) AS file_download_past_5_day,
-- Maximum recipe_add_to_list in the past 1 day
MAX(UWLM.recipe_add_to_list_past_1_day) OVER(user_rolling_lead_window) AS recipe_add_to_list_past_1_day,
-- Maximum recipe_add_to_list in the past 2 days
Expand Down Expand Up @@ -240,6 +250,11 @@ CREATE OR REPLACE TEMP TABLE training_preparation as (
UWLM.view_search_results_past_3_day,
UWLM.view_search_results_past_4_day,
UWLM.view_search_results_past_5_day,
UWLM.file_download_past_1_day,
UWLM.file_download_past_2_day,
UWLM.file_download_past_3_day,
UWLM.file_download_past_4_day,
UWLM.file_download_past_5_day,
UWLM.recipe_add_to_list_past_1_day,
UWLM.recipe_add_to_list_past_2_day,
UWLM.recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -325,6 +340,11 @@ CREATE OR REPLACE TEMP TABLE DataForTargetTable AS(
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -400,6 +420,11 @@ CREATE OR REPLACE TABLE `{{project_id}}.{{dataset}}.lead_score_propensity_traini
LAST_VALUE(view_search_results_past_3_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS view_search_results_past_3_day,
LAST_VALUE(view_search_results_past_4_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS view_search_results_past_4_day,
LAST_VALUE(view_search_results_past_5_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS view_search_results_past_5_day,
LAST_VALUE(file_download_past_1_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS file_download_past_1_day,
LAST_VALUE(file_download_past_2_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS file_download_past_2_day,
LAST_VALUE(file_download_past_3_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS file_download_past_3_day,
LAST_VALUE(file_download_past_4_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS file_download_past_4_day,
LAST_VALUE(file_download_past_5_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS file_download_past_5_day,
LAST_VALUE(recipe_add_to_list_past_1_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS recipe_add_to_list_past_1_day,
LAST_VALUE(recipe_add_to_list_past_2_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS recipe_add_to_list_past_2_day,
LAST_VALUE(recipe_add_to_list_past_3_day) OVER(PARTITION BY user_pseudo_id, feature_date ORDER BY feature_date DESC) AS recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -473,6 +498,11 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_lead_score_propensity_train
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -550,6 +580,11 @@ SELECT DISTINCT
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -617,6 +652,11 @@ SELECT DISTINCT
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -696,6 +736,11 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_lead_score_propensity_train
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -767,6 +812,11 @@ SELECT DISTINCT
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -833,6 +883,11 @@ SELECT DISTINCT
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -908,6 +963,11 @@ CREATE OR REPLACE VIEW `{{project_id}}.{{dataset}}.v_lead_score_propensity_train
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -979,6 +1039,11 @@ SELECT DISTINCT
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down
12 changes: 11 additions & 1 deletion sql/procedure/user_rolling_window_lead_metrics.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ WHEN MATCHED THEN
I.recipe_add_to_menu_past_2_day = T.recipe_add_to_menu_past_2_day,
I.recipe_add_to_menu_past_3_day = T.recipe_add_to_menu_past_3_day,
I.recipe_add_to_menu_past_4_day = T.recipe_add_to_menu_past_4_day,
I.recipe_add_to_menu_past_5_day = T.recipe_add_to_menu_past_5_day,
I.recipe_add_to_menu_past_5_day = T.recipe_add_to_menu_past_5_day
WHEN NOT MATCHED THEN
INSERT
(processed_timestamp,
Expand All @@ -435,6 +435,11 @@ WHEN NOT MATCHED THEN
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down Expand Up @@ -479,6 +484,11 @@ WHEN NOT MATCHED THEN
T.view_search_results_past_3_day,
T.view_search_results_past_4_day,
T.view_search_results_past_5_day,
T.file_download_past_1_day,
T.file_download_past_2_day,
T.file_download_past_3_day,
T.file_download_past_4_day,
T.file_download_past_5_day,
T.recipe_add_to_list_past_1_day,
T.recipe_add_to_list_past_2_day,
T.recipe_add_to_list_past_3_day,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,11 @@ INSERT INTO `{{project_id}}.{{dataset}}.{{insert_table}}` (
view_search_results_past_3_day,
view_search_results_past_4_day,
view_search_results_past_5_day,
file_download_past_1_day,
file_download_past_2_day,
file_download_past_3_day,
file_download_past_4_day,
file_download_past_5_day,
recipe_add_to_list_past_1_day,
recipe_add_to_list_past_2_day,
recipe_add_to_list_past_3_day,
Expand Down
Loading

0 comments on commit ec8bc04

Please sign in to comment.