diff --git a/config/config.yaml.tftpl b/config/config.yaml.tftpl index c191513c..37d2f25c 100644 --- a/config/config.yaml.tftpl +++ b/config/config.yaml.tftpl @@ -16,11 +16,11 @@ google_cloud_project: project_id: "${project_id}" project_name: "${project_name}" project_number: "${project_number}" - region: "us-central1" + region: "${cloud_region}" cloud_build: project_id: "${project_id}" - region: "us-central1" + region: "${cloud_region}" github: owner: "${pipelines_github_owner}" repo_name: "${pipelines_github_repo}" @@ -29,8 +29,8 @@ cloud_build: _REPOSITORY_GCP_PROJECT: "${project_id}" _REPOSITORY_NAME: "github_${pipelines_github_owner}_${pipelines_github_repo}" _REPOSITORY_BRANCH: "main" - _GCR_HOSTNAME: "us-central1-docker.pkg.dev" - _BUILD_REGION: "us-central1" + _GCR_HOSTNAME: "${cloud_region}-docker.pkg.dev" + _BUILD_REGION: "${cloud_region}" container: builder: @@ -42,17 +42,17 @@ container: from_image: "wbsouza/zetasql-formatter:latest" base_image_name: "zetasql-formatter" base_image_prefix: "propensity-modeling" - container_registry_hostname: "us-central1-docker.pkg.dev" - container_registry_region: "us-central1" + container_registry_hostname: "${cloud_region}-docker.pkg.dev" + container_registry_region: "${cloud_region}" artifact_registry: pipelines_repo: name: "pipelines-repo" - region: "us-central1" + region: "${cloud_region}" project_id: "${project_id}" pipelines_docker_repo: name: "pipelines-docker-repo" - region: "us-central1" + region: "${cloud_region}" project_id: "${project_id}" dataflow: @@ -68,7 +68,7 @@ vertex_ai: project_id: "${project_id}" service_account_id: "vertex-pipelines-sa" service_account: "vertex-pipelines-sa@${project_id}.iam.gserviceaccount.com" - region: "us-central1" + region: "${cloud_region}" bucket_name: "${project_id}-pipelines" root_path: "gs://${project_id}-pipelines/pipelines/" @@ -169,7 +169,7 @@ vertex_ai: state: ACTIVE # possible states ACTIVE or PAUSED pipeline_parameters: project: "${project_id}" - location: "us-central1" + location: "${cloud_region}" root_dir: "gs://${project_id}-pipelines/propensity-training" transformations: "gs://${project_id}-pipelines/propensity-training/transformations_config_{timestamp}.json" train_budget_milli_node_hours: 1000 # 1 hour @@ -220,7 +220,7 @@ vertex_ai: state: ACTIVE # possible states ACTIVE or PAUSED pipeline_parameters: project_id: "${project_id}" - location: "us-central1" + location: "${cloud_region}" job_name_prefix: "propensity-prediction-pl-" model_display_name: "propensity-training-pl-model" # must match the model name defined in the training pipeline. for now it is {NAME_OF_PIPELINE}-model model_metric_name: "logLoss" @@ -314,7 +314,7 @@ vertex_ai: state: ACTIVE # possible states ACTIVE or PAUSED pipeline_parameters: project: "${project_id}" - location: "us-central1" + location: "${cloud_region}" root_dir: "gs://${project_id}-pipelines/clv-training" transformations: "gs://${project_id}-pipelines/clv-training/transformations_config_{timestamp}.json" train_budget_milli_node_hours: 1000 # 1 hour @@ -361,7 +361,7 @@ vertex_ai: state: ACTIVE # possible states ACTIVE or PAUSED pipeline_parameters: project_id: "${project_id}" - location: "us-central1" + location: "${cloud_region}" job_name_prefix: "clv-prediction-pl-" model_display_name: "clv-training-pl-model" # must match the model name defined in the training pipeline. for now it is {NAME_OF_PIPELINE}-model model_metric_name: "meanAbsoluteError" #'rootMeanSquaredError', 'meanAbsoluteError', 'meanAbsolutePercentageError', 'rSquared', 'rootMeanSquaredLogError' diff --git a/infrastructure/terraform/README.md b/infrastructure/terraform/README.md index 004879c3..f00fbc02 100644 --- a/infrastructure/terraform/README.md +++ b/infrastructure/terraform/README.md @@ -84,6 +84,16 @@ installation. vim ${TERRAFORM_RUN_DIR}/terraform.tfvars ``` + **Note:** The variable `google_default_region` determines the region where the resources are hosted. The variable default value is `us-central1`, based on your data residency requirements you should change the variable value by add the following in your `terraform.tfvars` file: + ``` + google_default_region = "[specific Google Cloud region of choice]" + ``` + **Note:** The variable `destination_data_location` determines the location for the data store in BigQuery. You have the choice to either store the data in single region by assigning value such as + * `us-central1`, `europe-west1`, `asia-east1` etc + + or in multi-regions by assigning value such as + * `US` or `EU` + 1. Run Terraform to create resources: ```bash diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 91dfc436..6f18b34f 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -23,6 +23,8 @@ data "google_project" "feature_store_project" { module "data_store" { source = "./modules/data-store" + google_default_region = var.google_default_region + source_ga4_export_project_id = var.source_ga4_export_project_id source_ga4_export_dataset = var.source_ga4_export_dataset source_ads_export_data = var.source_ads_export_data @@ -63,6 +65,7 @@ resource "local_file" "feature_store_configuration" { project_id = var.feature_store_project_id project_name = data.google_project.feature_store_project.name project_number = data.google_project.feature_store_project.number + cloud_region = var.google_default_region mds_dataset = "${var.mds_dataset_prefix}_${local.mds_dataset_suffix}" pipelines_github_owner = var.pipelines_github_owner pipelines_github_repo = var.pipelines_github_repo @@ -113,6 +116,7 @@ module "feature_store" { enabled = var.deploy_feature_store count = var.deploy_feature_store ? 1 : 0 project_id = var.feature_store_project_id + region = var.google_default_region sql_dir_input = null_resource.generate_sql_queries.id != "" ? "${local.source_root_dir}/sql" : "" } @@ -128,6 +132,7 @@ module "activation" { source = "./modules/activation" project_id = var.activation_project_id location = var.google_default_region + data_location = var.destination_data_location trigger_function_location = var.google_default_region poetry_cmd = var.poetry_cmd ga4_measurement_id = var.ga4_measurement_id diff --git a/infrastructure/terraform/modules/activation/main.tf b/infrastructure/terraform/modules/activation/main.tf index 3a9b583e..753299e0 100644 --- a/infrastructure/terraform/modules/activation/main.tf +++ b/infrastructure/terraform/modules/activation/main.tf @@ -72,7 +72,7 @@ module "bigquery" { dataset_name = local.app_prefix description = "activation appliction logs" project_id = var.project_id - location = "US" + location = var.data_location default_table_expiration_ms = 360000000 } diff --git a/infrastructure/terraform/modules/activation/variables.tf b/infrastructure/terraform/modules/activation/variables.tf index fa097d0e..ef2bfc79 100644 --- a/infrastructure/terraform/modules/activation/variables.tf +++ b/infrastructure/terraform/modules/activation/variables.tf @@ -22,6 +22,11 @@ variable "location" { type = string } +variable "data_location" { + description = "Data storage region for activation data" + type = string +} + variable "artifact_repository_id" { description = "Container repository id" type = string diff --git a/infrastructure/terraform/modules/data-store/variables.tf b/infrastructure/terraform/modules/data-store/variables.tf index 43bbc9d2..3633635c 100644 --- a/infrastructure/terraform/modules/data-store/variables.tf +++ b/infrastructure/terraform/modules/data-store/variables.tf @@ -28,7 +28,6 @@ variable "data_processing_project_id" { } variable "google_default_region" { - default = "us-central1" description = "The default Google Cloud region." type = string } @@ -114,7 +113,7 @@ variable "source_ga4_export_dataset" { variable "source_ads_export_data" { description = "List of BigQuery's Ads Data Transfer datasets" - type = list(object({ + type = list(object({ project = string dataset = string table_suffix = string diff --git a/infrastructure/terraform/modules/feature-store/variables.tf b/infrastructure/terraform/modules/feature-store/variables.tf index 5b6c8ae7..86b52474 100644 --- a/infrastructure/terraform/modules/feature-store/variables.tf +++ b/infrastructure/terraform/modules/feature-store/variables.tf @@ -25,7 +25,6 @@ variable "enabled" { variable "region" { description = "feature store region" type = string - default = "us-central1" } variable "project_id" { diff --git a/python/pipelines/components/bigquery/component.py b/python/pipelines/components/bigquery/component.py index 3bfed940..c99f164a 100644 --- a/python/pipelines/components/bigquery/component.py +++ b/python/pipelines/components/bigquery/component.py @@ -46,7 +46,7 @@ def bq_stored_procedure_exec( client = bigquery.Client( project=project, - # location=location + location=location ) params = [] @@ -95,7 +95,7 @@ def bq_clustering_exec( client = bigquery.Client( project=project_id, - # location=location + location=location ) model_bq_name = f"{model_name_bq_prefix}_{str(int(datetime.now().timestamp()))}" @@ -125,7 +125,7 @@ def bq_clustering_exec( client = bigquery.Client( project=project_id, - # location=location + location=location ) query_job = client.query( @@ -157,7 +157,7 @@ def bq_evaluate( client = bigquery.Client( project=project, - # location=location + location=location ) query_job = client.query( @@ -222,7 +222,7 @@ def list(cls): # Construct a BigQuery client object. client = bigquery.Client( project=project_id, - # location=location + location=location ) # TODO(developer): Set dataset_id to the ID of the dataset that contains @@ -354,7 +354,7 @@ def bq_flatten_tabular_binary_prediction_table( # Construct a BigQuery client object. client = bigquery.Client( project=project_id, - # location=location + location=location ) # Inspect the metadata set on destination_table and predictions_table @@ -437,7 +437,7 @@ def bq_flatten_tabular_regression_table( # Construct a BigQuery client object. client = bigquery.Client( project=project_id, - # location=location + location=location ) # Inspect the metadata set on destination_table and predictions_table @@ -508,7 +508,7 @@ def bq_flatten_kmeans_prediction_table( # Construct a BigQuery client object. client = bigquery.Client( project=project_id, - # location=location + location=location ) # Make an API request.