Skip to content

Commit

Permalink
Merge branch 'main' into feature/auto-audience-segmentation
Browse files Browse the repository at this point in the history
  • Loading branch information
zvizdo committed Oct 10, 2023
2 parents d061114 + a506012 commit 9bfd681
Show file tree
Hide file tree
Showing 11 changed files with 1,908 additions and 72 deletions.
4 changes: 2 additions & 2 deletions config/config.yaml.tftpl
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ vertex_ai:
# data_source_bigquery_table_path: "bq://${project_id}.purchase_propensity.v_purchase_propensity_training_15_7"
# data_source_bigquery_table_path: "bq://${project_id}.purchase_propensity.v_purchase_propensity_training_15_15"
data_source_bigquery_table_path: "bq://${project_id}.purchase_propensity.v_purchase_propensity_training_30_15"
data_source_bigquery_table_schema: "sql/schema/table/purchase_propensity_training_preparation.json"
data_source_bigquery_table_schema: "../sql/schema/table/purchase_propensity_training_preparation.json"
dataflow_service_account: "df-worker@${project_id}.iam.gserviceaccount.com"
timestamp_split_key: null
stratified_split_key: null
Expand Down Expand Up @@ -355,7 +355,7 @@ vertex_ai:
data_source_csv_filenames: null
optimization_objective: minimize-mae # minimize-mae | minimize-rmse | minimize-rmsle
data_source_bigquery_table_path: "bq://${project_id}.customer_lifetime_value.v_customer_lifetime_value_training_180_30"
data_source_bigquery_table_schema: "sql/schema/table/customer_lifetime_value_training_preparation.json"
data_source_bigquery_table_schema: "../sql/schema/table/customer_lifetime_value_training_preparation.json"
dataflow_service_account: "df-worker@${project_id}.iam.gserviceaccount.com"
timestamp_split_key: null
stratified_split_key: null
Expand Down
51 changes: 37 additions & 14 deletions infrastructure/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@ locals {
config_file_name = "config"
poetry_run_alias = "${var.poetry_cmd} run"
mds_dataset_suffix = var.create_prod_environment ? "prod" : var.create_dev_environment ? "dev" : "staging"

project_toml_file_path = "${local.source_root_dir}/pyproject.toml"
project_toml_content_hash = filesha512(local.project_toml_file_path)

generated_sql_queries_directory_path = "${local.source_root_dir}/sql/query"
generated_sql_queries_fileset = [for f in fileset(local.generated_sql_queries_directory_path, "*.sql") : "${local.generated_sql_queries_directory_path}/${f}"]
generated_sql_queries_content_hash = sha512(join("", [for f in local.generated_sql_queries_fileset : fileexists(f) ? filesha512(f) : sha512("file-not-found")]))

generated_sql_procedures_directory_path = "${local.source_root_dir}/sql/procedure"
generated_sql_procedures_fileset = [for f in fileset(local.generated_sql_procedures_directory_path, "*.sql") : "${local.generated_sql_procedures_directory_path}/${f}"]
generated_sql_procedures_content_hash = sha512(join("", [for f in local.generated_sql_procedures_fileset : fileexists(f) ? filesha512(f) : sha512("file-not-found")]))
}

resource "local_file" "feature_store_configuration" {
Expand All @@ -75,39 +86,51 @@ resource "local_file" "feature_store_configuration" {
}

resource "null_resource" "poetry_install" {
triggers = {
create_command = "${var.poetry_cmd} install"
source_contents_hash = local.project_toml_content_hash
}

provisioner "local-exec" {
command = "${var.poetry_cmd} install"
when = create
command = self.triggers.create_command
working_dir = local.source_root_dir
}
}

resource "null_resource" "generate_sql_queries" {

triggers = {
create_command = <<-EOT
${local.poetry_run_alias} inv apply-env-variables-queries --env-name=${local.config_file_name}
${local.poetry_run_alias} inv apply-env-variables-procedures --env-name=${local.config_file_name}
EOT

destroy_command = <<-EOT
rm -f sql/query/*.sql
rm -f sql/procedure/*.sql
EOT

working_dir = local.source_root_dir

poetry_installed = null_resource.poetry_install.id

source_contents_hash = local_file.feature_store_configuration.content_sha512
destination_queries_hash = local.generated_sql_queries_content_hash
destination_procedures_hash = local.generated_sql_procedures_content_hash
}

provisioner "local-exec" {
command = <<-EOT
${local.poetry_run_alias} inv apply-env-variables-queries --env-name=${local.config_file_name}
${local.poetry_run_alias} inv apply-env-variables-procedures --env-name=${local.config_file_name}
EOT
when = create
command = self.triggers.create_command
working_dir = self.triggers.working_dir
}

provisioner "local-exec" {
when = destroy
command = <<-EOT
rm sql/query/*.sql
rm sql/procedure/*.sql
EOT
command = self.triggers.destroy_command
working_dir = self.triggers.working_dir
}

depends_on = [
local_file.feature_store_configuration,
null_resource.poetry_install
]
}

module "feature_store" {
Expand Down
97 changes: 65 additions & 32 deletions infrastructure/terraform/modules/activation/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ locals {

}

data "google_project" "activation_project" {
project_id = var.project_id
}

module "project_services" {
source = "terraform-google-modules/project-factory/google//modules/project_services"
version = "14.1.0"
Expand All @@ -62,6 +66,8 @@ module "project_services" {
"storage.googleapis.com",
"datapipelines.googleapis.com",
"analyticsadmin.googleapis.com",
"eventarc.googleapis.com",
"run.googleapis.com",
]
}

Expand Down Expand Up @@ -351,46 +357,73 @@ resource "google_storage_bucket_object" "activation_trigger_archive" {
bucket = module.function_bucket.name
}

resource "google_cloudfunctions_function" "activation_trigger_cf" {
name = "activation-trigger"
project = var.project_id
region = var.trigger_function_location
runtime = "python311"
resource "google_cloudfunctions2_function" "activation_trigger_cf" {
name = "activation-trigger"
project = var.project_id
location = var.trigger_function_location

build_config {
runtime = "python311"
source {
storage_source {
bucket = module.function_bucket.name
object = google_storage_bucket_object.activation_trigger_archive.name
}
}
entry_point = "subscribe"
}

available_memory_mb = 256
max_instances = 3
source_archive_bucket = module.function_bucket.name
source_archive_object = google_storage_bucket_object.activation_trigger_archive.name
event_trigger {
event_type = "google.pubsub.topic.publish"
resource = google_pubsub_topic.activation_trigger.name
}
timeout = 60
entry_point = "subscribe"
service_account_email = module.trigger_function_account.email

environment_variables = {
ACTIVATION_PROJECT = var.project_id
ACTIVATION_REGION = var.location
ACTIVATION_TYPE_CONFIGURATION = "gs://${module.pipeline_bucket.name}/${google_storage_bucket_object.activation_type_configuration_file.output_name}"
TEMPLATE_FILE_GCS_LOCATION = "gs://${module.pipeline_bucket.name}/dataflow/templates/${local.activation_container_image_id}.json"
PIPELINE_TEMP_LOCATION = "gs://${module.pipeline_bucket.name}/tmp/"
LOG_DATA_SET = module.bigquery.bigquery_dataset.dataset_id
PIPELINE_WORKER_EMAIL = module.pipeline_service_account.email
event_type = "google.cloud.pubsub.topic.v1.messagePublished"
pubsub_topic = google_pubsub_topic.activation_trigger.id
retry_policy = "RETRY_POLICY_DO_NOT_RETRY"
trigger_region = var.trigger_function_location
}
secret_environment_variables {
key = "GA4_MEASUREMENT_ID"
secret = split("/", module.secret_manager.secret_names[0])[3]
version = split("/", module.secret_manager.secret_versions[0])[5]

service_config {
available_memory = "256M"
max_instance_count = 3
timeout_seconds = 60
ingress_settings = "ALLOW_INTERNAL_ONLY"
service_account_email = module.trigger_function_account.email
environment_variables = {
ACTIVATION_PROJECT = var.project_id
ACTIVATION_REGION = var.location
ACTIVATION_TYPE_CONFIGURATION = "gs://${module.pipeline_bucket.name}/${google_storage_bucket_object.activation_type_configuration_file.output_name}"
TEMPLATE_FILE_GCS_LOCATION = "gs://${module.pipeline_bucket.name}/dataflow/templates/${local.activation_container_image_id}.json"
PIPELINE_TEMP_LOCATION = "gs://${module.pipeline_bucket.name}/tmp/"
LOG_DATA_SET = module.bigquery.bigquery_dataset.dataset_id
PIPELINE_WORKER_EMAIL = module.pipeline_service_account.email
}
secret_environment_variables {
project_id = var.project_id
key = "GA4_MEASUREMENT_ID"
secret = split("/", module.secret_manager.secret_names[0])[3]
version = split("/", module.secret_manager.secret_versions[0])[5]
}
secret_environment_variables {
project_id = var.project_id
key = "GA4_MEASUREMENT_SECRET"
secret = split("/", module.secret_manager.secret_names[1])[3]
version = split("/", module.secret_manager.secret_versions[1])[5]
}
}

secret_environment_variables {
key = "GA4_MEASUREMENT_SECRET"
secret = split("/", module.secret_manager.secret_names[1])[3]
version = split("/", module.secret_manager.secret_versions[1])[5]
lifecycle {
ignore_changes = [build_config[0].source[0].storage_source[0].generation]
}

depends_on = [
module.project_services
]
}

module "add_invoker_binding" {
source = "terraform-google-modules/gcloud/google"
version = "3.1.2"

platform = "linux"

create_cmd_body = "functions add-invoker-policy-binding ${google_cloudfunctions2_function.activation_trigger_cf.name} --project=${google_cloudfunctions2_function.activation_trigger_cf.project} --region=${google_cloudfunctions2_function.activation_trigger_cf.location} --member=\"serviceAccount:${data.google_project.activation_project.number}[email protected]\""
destroy_cmd_body = "functions remove-invoker-policy-binding ${google_cloudfunctions2_function.activation_trigger_cf.name} --project=${google_cloudfunctions2_function.activation_trigger_cf.project} --region=${google_cloudfunctions2_function.activation_trigger_cf.location} --member=\"serviceAccount:${data.google_project.activation_project.number}[email protected]\""
}
Loading

0 comments on commit 9bfd681

Please sign in to comment.