Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Property id in resources for multi property support #245

Merged
7 changes: 3 additions & 4 deletions docs/data_store.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,11 @@ To deploy the Marketing Data Store, follow the pre-requisites and instructions i
Next, after creating the Terraform variables file by making a copy from the template, set the Terraform variables to create the environments you need for Dataform.

```bash
create_dev_environment = false
create_staging_environment = false
create_prod_environment = true
deploy_dataform = true
property_id = "PROPERTY_ID"
```

When the `create_dev_environment` variable is set to `true`, a development environment will be created. When the `create_staging_environment` variable is set to `true`, a staging environment will be created. When the `create_prod_environment` variable is set to `true`, a production environment will be created.
When the `deploy_dataform` variable is set to `true`, a dataform workspace will be created.

![Dataform Repository](images/data_store_dataform_github_repository.png)
After deploying the Marketing Data Store, the repository called `marketing_analytics` is created in Dataform.
Expand Down
5 changes: 1 addition & 4 deletions infrastructure/cloudshell/terraform-template.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@
tf_state_project_id = "${MAJ_DEFAULT_PROJECT_ID}"
google_default_region = "${MAJ_DEFAULT_REGION}"

create_dev_environment = false
create_staging_environment = false
create_prod_environment = true

deploy_dataform = true
deploy_activation = true
deploy_feature_store = true
deploy_pipelines = true
Expand Down
21 changes: 6 additions & 15 deletions infrastructure/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ locals {
# The uv_run_alias is the alias of the uv run command.
uv_run_alias = "${var.uv_cmd} run"
# The mds_dataset_suffix is the suffix of the marketing data store dataset.
mds_dataset_suffix = var.create_staging_environment ? "staging" : var.create_dev_environment ? "dev" : "prod"
mds_dataset_suffix = var.property_id
# The project_toml_file_path is the path to the project.toml file.
project_toml_file_path = "${local.source_root_dir}/pyproject.toml"
# The project_toml_content_hash is the hash of the project.toml file.
Expand Down Expand Up @@ -284,8 +284,7 @@ resource "null_resource" "check_iam_api" {
# Create the data store module.
# The data store module creates the marketing data store in BigQuery, creates the ETL pipeline in Dataform
# for the marketing data from Google Ads and Google Analytics.
# The data store is created only if the `create_prod_environment`, `create_staging_environment`
# or `create_dev_environment` variable is set to true in the terraform.tfvars file.
# The data store is created only if the `deploy_dataform` variable is set to true in the terraform.tfvars file.
# The data store is created in the `data_project_id` project.
module "data_store" {
# The source directory of the data store module.
Expand Down Expand Up @@ -317,18 +316,10 @@ module "data_store" {
dataform_github_repo = var.dataform_github_repo
dataform_github_token = var.dataform_github_token

# The create_dev_environment is set in the terraform.tfvars file.
# The create_dev_environment determines if the dev environment is created.
# When the value is true, the dev environment is created.
# The create_staging_environment is set in the terraform.tfvars file.
# The create_staging_environment determines if the staging environment is created.
# When the value is true, the staging environment is created.
# The create_prod_environment is set in the terraform.tfvars file.
# The create_prod_environment determines if the prod environment is created.
# When the value is true, the prod environment is created.
create_dev_environment = var.create_dev_environment
create_staging_environment = var.create_staging_environment
create_prod_environment = var.create_prod_environment
# The create_dataform determines if dataform is created.
# When the value is true, the dataform environment is created.
deploy_dataform = var.deploy_dataform
property_id = var.property_id

# The dev_data_project_id is the project ID of where the dev datasets will created.
#If not provided, data_project_id will be used.
Expand Down
79 changes: 4 additions & 75 deletions infrastructure/terraform/modules/data-store/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,90 +29,19 @@ provider "google" {
region = var.google_default_region
}

# This module sets up a Dataform workflow environment for the "dev" environment.
module "dataform-workflow-dev" {
# The count argument specifies how many instances of the module should be created.
# In this case, it's set to var.create_dev_environment ? 1 : 0, which means that
# the module will be created only if the var.create_dev_environment variable is set to `true`.
# Check the terraform.tfvars file for more information.
count = var.create_dev_environment ? 1 : 0
# the path to the Terraform module that will be used to create the Dataform workflow environment.
source = "../dataform-workflow"

project_id = null_resource.check_dataform_api.id != "" ? module.data_processing_project_services.project_id : data.google_project.data_processing.project_id
# The name of the Dataform workflow environment.
environment = "dev"
region = var.google_default_region
# The ID of the Dataform repository that will be used by the Dataform workflow environment.
dataform_repository_id = google_dataform_repository.marketing-analytics.id
# A list of tags that will be used to filter the Dataform files that are included in the Dataform workflow environment.
includedTags = ["ga4"]

source_ga4_export_project_id = var.source_ga4_export_project_id
source_ga4_export_dataset = var.source_ga4_export_dataset
ga4_incremental_processing_days_back = var.ga4_incremental_processing_days_back
source_ads_export_data = var.source_ads_export_data
destination_bigquery_project_id = length(var.dev_data_project_id) > 0 ? var.staging_data_project_id : var.data_project_id
destination_bigquery_dataset_location = length(var.dev_destination_data_location) > 0 ? var.dev_destination_data_location : var.destination_data_location

# The daily schedule for running the Dataform workflow.
# Depending on the hour that your Google Analytics 4 BigQuery Export is set,
# you may have to change this to execute at a later time of the day.
# Observe that the GA4 BigQuery Export Schedule documentation
# https://support.google.com/analytics/answer/9358801?hl=en#:~:text=A%20full%20export%20of%20data,(see%20Streaming%20export%20below).
# Check https://crontab.guru/#0_5-23/4_*_*_* to see next execution times.
daily_schedule = "0 5-23/4 * * *"
time_zone = var.time_zone
}

# This module sets up a Dataform workflow environment for the "staging" environment.
module "dataform-workflow-staging" {
# The count argument specifies how many instances of the module should be created.
# In this case, it's set to var.create_staging_environment ? 1 : 0, which means that
# the module will be created only if the var.create_staging_environment variable is set to `true`.
# Check the terraform.tfvars file for more information.
count = var.create_staging_environment ? 1 : 0
# the path to the Terraform module that will be used to create the Dataform workflow environment.
source = "../dataform-workflow"

project_id = null_resource.check_dataform_api.id != "" ? module.data_processing_project_services.project_id : data.google_project.data_processing.project_id
# The name of the Dataform workflow environment.
environment = "staging"
region = var.google_default_region
# The ID of the Dataform repository that will be used by the Dataform workflow environment.
dataform_repository_id = google_dataform_repository.marketing-analytics.id
# A list of tags that will be used to filter the Dataform files that are included in the Dataform workflow environment.
includedTags = ["ga4"]

source_ga4_export_project_id = var.source_ga4_export_project_id
source_ga4_export_dataset = var.source_ga4_export_dataset
source_ads_export_data = var.source_ads_export_data
destination_bigquery_project_id = length(var.staging_data_project_id) > 0 ? var.staging_data_project_id : var.data_project_id
destination_bigquery_dataset_location = length(var.staging_destination_data_location) > 0 ? var.staging_destination_data_location : var.destination_data_location

# The daily schedule for running the Dataform workflow.
# Depending on the hour that your Google Analytics 4 BigQuery Export is set,
# you may have to change this to execute at a later time of the day.
# Observe that the GA4 BigQuery Export Schedule documentation
# https://support.google.com/analytics/answer/9358801?hl=en#:~:text=A%20full%20export%20of%20data,(see%20Streaming%20export%20below).
# Check https://crontab.guru/#0_5-23/4_*_*_* to see next execution times.
daily_schedule = "0 5-23/4 * * *"
time_zone = var.time_zone
}

# This module sets up a Dataform workflow environment for the "prod" environment.
module "dataform-workflow-prod" {
# The count argument specifies how many instances of the module should be created.
# In this case, it's set to var.create_prod_environment ? 1 : 0, which means that
# the module will be created only if the var.create_prod_environment variable is set to `true`.
# In this case, it's set to var.deploy_dataform ? 1 : 0, which means that
# the module will be created only if the var.deploy_dataform variable is set to `true`.
# Check the terraform.tfvars file for more information.
count = var.create_prod_environment ? 1 : 0
count = var.deploy_dataform ? 1 : 0
# the path to the Terraform module that will be used to create the Dataform workflow environment.
source = "../dataform-workflow"

project_id = null_resource.check_dataform_api.id != "" ? module.data_processing_project_services.project_id : data.google_project.data_processing.project_id
# The name of the Dataform workflow environment.
environment = "prod"
property_id = var.property_id
region = var.google_default_region
dataform_repository_id = google_dataform_repository.marketing-analytics.id

Expand Down
22 changes: 8 additions & 14 deletions infrastructure/terraform/modules/data-store/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,6 @@ variable "dataform_github_token" {
type = string
}

variable "create_dev_environment" {
description = "Indicates that a development environment needs to be created"
type = bool
default = true
}

variable "dev_data_project_id" {
description = "Project ID of where the dev datasets will created. If not provided, data_project_id will be used."
type = string
Expand All @@ -65,12 +59,6 @@ variable "dev_destination_data_location" {
default = ""
}

variable "create_staging_environment" {
description = "Indicates that a staging environment needs to be created"
type = bool
default = true
}

variable "staging_data_project_id" {
description = "Project ID of where the staging datasets will created. If not provided, data_project_id will be used."
type = string
Expand All @@ -83,12 +71,18 @@ variable "staging_destination_data_location" {
default = ""
}

variable "create_prod_environment" {
description = "Indicates that a production environment needs to be created"
variable "deploy_dataform" {
description = "Indicates that a dataform workspace needs to be created"
type = bool
default = true
}

variable "property_id" {
description = "Google Analytics 4 Property id to create an MDS for it"
type = string
default = ""
}

variable "prod_data_project_id" {
description = "Project ID of where the prod datasets will created. If not provided, data_project_id will be used."
type = string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ locals {
# This resources creates a workflow that runs the Dataform incremental pipeline.
resource "google_workflows_workflow" "dataform-incremental-workflow" {
project = null_resource.check_workflows_api.id != "" ? module.data_processing_project_services.project_id : var.project_id
name = "dataform-${var.environment}-incremental"
name = "dataform-${var.property_id}-incremental"
region = var.region
description = "Dataform incremental workflow for ${var.environment} environment"
description = "Dataform incremental workflow for ${var.property_id} ga4 property"
service_account = google_service_account.workflow-dataform.email
# The source code includes the following steps:
# Init: This step initializes the workflow by assigning the value of the dataform_repository_id variable to the repository variable.
Expand All @@ -49,7 +49,7 @@ main:
defaultDatabase: ${var.destination_bigquery_project_id}
defaultLocation: ${var.destination_bigquery_dataset_location}
vars:
env: ${var.environment}
ga4_property_id: '${var.property_id}'
ga4_export_project: ${var.source_ga4_export_project_id}
ga4_export_dataset: ${var.source_ga4_export_dataset}
ga4_incremental_processing_days_back: '${var.ga4_incremental_processing_days_back}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# This creates a Cloud Scheduler job that triggers the Dataform incremental workflow on a daily schedule.
resource "google_cloud_scheduler_job" "daily-dataform-increments" {
project = module.data_processing_project_services.project_id
name = "daily-dataform-${var.environment}"
description = "Daily Dataform ${var.environment} environment incremental update"
name = "daily-dataform-${var.property_id}"
description = "Daily Dataform ${var.property_id} property export incremental update"
# The schedule attribute specifies the schedule for the job. In this case, the job is scheduled to run daily at the specified times.
schedule = var.daily_schedule
time_zone = var.time_zone
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ resource "google_service_account" "scheduler" {
]

project = null_resource.check_cloudscheduler_api.id != "" ? module.data_processing_project_services.project_id : var.project_id
account_id = "workflow-scheduler-${var.environment}"
display_name = "Service Account to schedule Dataform workflows in ${var.environment}"
account_id = "workflow-scheduler-${var.property_id}"
display_name = "Service Account to schedule Dataform workflows in ${var.property_id}"
}

locals {
scheduler_sa = "workflow-scheduler-${var.environment}@${module.data_processing_project_services.project_id}.iam.gserviceaccount.com"
workflows_sa = "workflow-dataform-${var.environment}@${module.data_processing_project_services.project_id}.iam.gserviceaccount.com"
scheduler_sa = "workflow-scheduler-${var.property_id}@${module.data_processing_project_services.project_id}.iam.gserviceaccount.com"
workflows_sa = "workflow-dataform-${var.property_id}@${module.data_processing_project_services.project_id}.iam.gserviceaccount.com"
}

# Wait for the scheduler service account to be created
Expand Down Expand Up @@ -74,8 +74,8 @@ resource "google_service_account" "workflow-dataform" {
]

project = null_resource.check_workflows_api.id != "" ? module.data_processing_project_services.project_id : var.project_id
account_id = "workflow-dataform-${var.environment}"
display_name = "Service Account to run Dataform workflows in ${var.environment}"
account_id = "workflow-dataform-${var.property_id}"
display_name = "Service Account to run Dataform workflows in ${var.property_id}"
}

# Wait for the workflows service account to be created
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ variable "region" {
type = string
}

variable "environment" {
variable "property_id" {
type = string
}

Expand Down
6 changes: 2 additions & 4 deletions infrastructure/terraform/terraform-sample.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@

tf_state_project_id = "Google Cloud project where the terraform state file is stored"

create_dev_environment = false
create_staging_environment = false
create_prod_environment = true

deploy_dataform = true
deploy_activation = true
deploy_feature_store = true
deploy_pipelines = true
Expand All @@ -28,6 +25,7 @@ deploy_monitoring = true
#################### DATA VARIABLES #################################

data_project_id = "Project id where the MDS datasets will be created"
property_id = "Google Analytics 4 property id to identify an unique MDS deployment"
destination_data_location = "BigQuery location (either regional or multi-regional) for the MDS BigQuery datasets."
data_processing_project_id = "Project id where the Dataform will be installed and run"
source_ga4_export_project_id = "Project id which contains the GA4 export dataset"
Expand Down
26 changes: 10 additions & 16 deletions infrastructure/terraform/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,6 @@ variable "pipelines_github_owner" {
default = "temporarily unused"
}

variable "create_dev_environment" {
description = "Indicates that a development environment needs to be created"
type = bool
default = true
}

variable "dev_data_project_id" {
description = "Project ID of where the dev datasets will created. If not provided, data_project_id will be used."
type = string
Expand All @@ -99,12 +93,6 @@ variable "dev_destination_data_location" {
default = ""
}

variable "create_staging_environment" {
description = "Indicates that a staging environment needs to be created"
type = bool
default = true
}

variable "staging_data_project_id" {
description = "Project ID of where the staging datasets will created. If not provided, data_project_id will be used."
type = string
Expand All @@ -117,10 +105,10 @@ variable "staging_destination_data_location" {
default = ""
}

variable "create_prod_environment" {
description = "Indicates that a production environment needs to be created"
type = bool
default = true
variable "property_id" {
description = "Google Analytics 4 Property ID to install the MDS"
type = string
default = ""
}

variable "prod_data_project_id" {
Expand Down Expand Up @@ -189,6 +177,12 @@ variable "ga4_measurement_secret" {
sensitive = true
}

variable "deploy_dataform" {
description = "Toggler for activation module"
type = bool
default = false
}

variable "deploy_activation" {
description = "Toggler for activation module"
type = bool
Expand Down
Loading