Skip to content

Commit

Permalink
Merge branch 'GoogleCloudPlatform:main' into improve-backfill
Browse files Browse the repository at this point in the history
  • Loading branch information
chmstimoteo authored Sep 21, 2023
2 parents d316ff3 + 5aa6df0 commit 12ed204
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 25 deletions.
26 changes: 13 additions & 13 deletions config/config.yaml.tftpl
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ google_cloud_project:
project_id: "${project_id}"
project_name: "${project_name}"
project_number: "${project_number}"
region: "us-central1"
region: "${cloud_region}"

cloud_build:
project_id: "${project_id}"
region: "us-central1"
region: "${cloud_region}"
github:
owner: "${pipelines_github_owner}"
repo_name: "${pipelines_github_repo}"
Expand All @@ -29,8 +29,8 @@ cloud_build:
_REPOSITORY_GCP_PROJECT: "${project_id}"
_REPOSITORY_NAME: "github_${pipelines_github_owner}_${pipelines_github_repo}"
_REPOSITORY_BRANCH: "main"
_GCR_HOSTNAME: "us-central1-docker.pkg.dev"
_BUILD_REGION: "us-central1"
_GCR_HOSTNAME: "${cloud_region}-docker.pkg.dev"
_BUILD_REGION: "${cloud_region}"

container:
builder:
Expand All @@ -42,17 +42,17 @@ container:
from_image: "wbsouza/zetasql-formatter:latest"
base_image_name: "zetasql-formatter"
base_image_prefix: "propensity-modeling"
container_registry_hostname: "us-central1-docker.pkg.dev"
container_registry_region: "us-central1"
container_registry_hostname: "${cloud_region}-docker.pkg.dev"
container_registry_region: "${cloud_region}"

artifact_registry:
pipelines_repo:
name: "pipelines-repo"
region: "us-central1"
region: "${cloud_region}"
project_id: "${project_id}"
pipelines_docker_repo:
name: "pipelines-docker-repo"
region: "us-central1"
region: "${cloud_region}"
project_id: "${project_id}"

dataflow:
Expand All @@ -68,7 +68,7 @@ vertex_ai:
project_id: "${project_id}"
service_account_id: "vertex-pipelines-sa"
service_account: "vertex-pipelines-sa@${project_id}.iam.gserviceaccount.com"
region: "us-central1"
region: "${cloud_region}"
bucket_name: "${project_id}-pipelines"
root_path: "gs://${project_id}-pipelines/pipelines/"

Expand Down Expand Up @@ -169,7 +169,7 @@ vertex_ai:
state: ACTIVE # possible states ACTIVE or PAUSED
pipeline_parameters:
project: "${project_id}"
location: "us-central1"
location: "${cloud_region}"
root_dir: "gs://${project_id}-pipelines/propensity-training"
transformations: "gs://${project_id}-pipelines/propensity-training/transformations_config_{timestamp}.json"
train_budget_milli_node_hours: 1000 # 1 hour
Expand Down Expand Up @@ -220,7 +220,7 @@ vertex_ai:
state: ACTIVE # possible states ACTIVE or PAUSED
pipeline_parameters:
project_id: "${project_id}"
location: "us-central1"
location: "${cloud_region}"
job_name_prefix: "propensity-prediction-pl-"
model_display_name: "propensity-training-pl-model" # must match the model name defined in the training pipeline. for now it is {NAME_OF_PIPELINE}-model
model_metric_name: "logLoss"
Expand Down Expand Up @@ -314,7 +314,7 @@ vertex_ai:
state: ACTIVE # possible states ACTIVE or PAUSED
pipeline_parameters:
project: "${project_id}"
location: "us-central1"
location: "${cloud_region}"
root_dir: "gs://${project_id}-pipelines/clv-training"
transformations: "gs://${project_id}-pipelines/clv-training/transformations_config_{timestamp}.json"
train_budget_milli_node_hours: 1000 # 1 hour
Expand Down Expand Up @@ -361,7 +361,7 @@ vertex_ai:
state: ACTIVE # possible states ACTIVE or PAUSED
pipeline_parameters:
project_id: "${project_id}"
location: "us-central1"
location: "${cloud_region}"
job_name_prefix: "clv-prediction-pl-"
model_display_name: "clv-training-pl-model" # must match the model name defined in the training pipeline. for now it is {NAME_OF_PIPELINE}-model
model_metric_name: "meanAbsoluteError" #'rootMeanSquaredError', 'meanAbsoluteError', 'meanAbsolutePercentageError', 'rSquared', 'rootMeanSquaredLogError'
Expand Down
10 changes: 10 additions & 0 deletions infrastructure/terraform/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ installation.
vim ${TERRAFORM_RUN_DIR}/terraform.tfvars
```
**Note:** The variable `google_default_region` determines the region where the resources are hosted. The variable default value is `us-central1`, based on your data residency requirements you should change the variable value by add the following in your `terraform.tfvars` file:
```
google_default_region = "[specific Google Cloud region of choice]"
```
**Note:** The variable `destination_data_location` determines the location for the data store in BigQuery. You have the choice to either store the data in single region by assigning value such as
* `us-central1`, `europe-west1`, `asia-east1` etc
or in multi-regions by assigning value such as
* `US` or `EU`
1. Run Terraform to create resources:
```bash
Expand Down
5 changes: 5 additions & 0 deletions infrastructure/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ data "google_project" "feature_store_project" {
module "data_store" {
source = "./modules/data-store"

google_default_region = var.google_default_region

source_ga4_export_project_id = var.source_ga4_export_project_id
source_ga4_export_dataset = var.source_ga4_export_dataset
source_ads_export_data = var.source_ads_export_data
Expand Down Expand Up @@ -63,6 +65,7 @@ resource "local_file" "feature_store_configuration" {
project_id = var.feature_store_project_id
project_name = data.google_project.feature_store_project.name
project_number = data.google_project.feature_store_project.number
cloud_region = var.google_default_region
mds_dataset = "${var.mds_dataset_prefix}_${local.mds_dataset_suffix}"
pipelines_github_owner = var.pipelines_github_owner
pipelines_github_repo = var.pipelines_github_repo
Expand Down Expand Up @@ -113,6 +116,7 @@ module "feature_store" {
enabled = var.deploy_feature_store
count = var.deploy_feature_store ? 1 : 0
project_id = var.feature_store_project_id
region = var.google_default_region
sql_dir_input = null_resource.generate_sql_queries.id != "" ? "${local.source_root_dir}/sql" : ""
}

Expand All @@ -128,6 +132,7 @@ module "activation" {
source = "./modules/activation"
project_id = var.activation_project_id
location = var.google_default_region
data_location = var.destination_data_location
trigger_function_location = var.google_default_region
poetry_cmd = var.poetry_cmd
ga4_measurement_id = var.ga4_measurement_id
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/terraform/modules/activation/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ module "bigquery" {
dataset_name = local.app_prefix
description = "activation appliction logs"
project_id = var.project_id
location = "US"
location = var.data_location
default_table_expiration_ms = 360000000
}

Expand Down
5 changes: 5 additions & 0 deletions infrastructure/terraform/modules/activation/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ variable "location" {
type = string
}

variable "data_location" {
description = "Data storage region for activation data"
type = string
}

variable "artifact_repository_id" {
description = "Container repository id"
type = string
Expand Down
3 changes: 1 addition & 2 deletions infrastructure/terraform/modules/data-store/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ variable "data_processing_project_id" {
}

variable "google_default_region" {
default = "us-central1"
description = "The default Google Cloud region."
type = string
}
Expand Down Expand Up @@ -114,7 +113,7 @@ variable "source_ga4_export_dataset" {

variable "source_ads_export_data" {
description = "List of BigQuery's Ads Data Transfer datasets"
type = list(object({
type = list(object({
project = string
dataset = string
table_suffix = string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ variable "enabled" {
variable "region" {
description = "feature store region"
type = string
default = "us-central1"
}

variable "project_id" {
Expand Down
16 changes: 8 additions & 8 deletions python/pipelines/components/bigquery/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def bq_stored_procedure_exec(

client = bigquery.Client(
project=project,
# location=location
location=location
)

params = []
Expand Down Expand Up @@ -95,7 +95,7 @@ def bq_clustering_exec(

client = bigquery.Client(
project=project_id,
# location=location
location=location
)

model_bq_name = f"{model_name_bq_prefix}_{str(int(datetime.now().timestamp()))}"
Expand Down Expand Up @@ -125,7 +125,7 @@ def bq_clustering_exec(

client = bigquery.Client(
project=project_id,
# location=location
location=location
)

query_job = client.query(
Expand Down Expand Up @@ -157,7 +157,7 @@ def bq_evaluate(

client = bigquery.Client(
project=project,
# location=location
location=location
)

query_job = client.query(
Expand Down Expand Up @@ -222,7 +222,7 @@ def list(cls):
# Construct a BigQuery client object.
client = bigquery.Client(
project=project_id,
# location=location
location=location
)

# TODO(developer): Set dataset_id to the ID of the dataset that contains
Expand Down Expand Up @@ -354,7 +354,7 @@ def bq_flatten_tabular_binary_prediction_table(
# Construct a BigQuery client object.
client = bigquery.Client(
project=project_id,
# location=location
location=location
)

# Inspect the metadata set on destination_table and predictions_table
Expand Down Expand Up @@ -437,7 +437,7 @@ def bq_flatten_tabular_regression_table(
# Construct a BigQuery client object.
client = bigquery.Client(
project=project_id,
# location=location
location=location
)

# Inspect the metadata set on destination_table and predictions_table
Expand Down Expand Up @@ -508,7 +508,7 @@ def bq_flatten_kmeans_prediction_table(
# Construct a BigQuery client object.
client = bigquery.Client(
project=project_id,
# location=location
location=location
)

# Make an API request.
Expand Down

0 comments on commit 12ed204

Please sign in to comment.