From ab4072227fe00ac6a9be8fadafa70fb215f35f0d Mon Sep 17 00:00:00 2001 From: Doc Ritezel Date: Tue, 1 Jul 2025 09:05:17 -0700 Subject: [PATCH] Use workflow identity federation in composer --- .../publish_open_data/publish_california_open_data.yml | 5 ----- .../validate_gtfs_schedule.yml | 7 ------- iac/cal-itp-data-infra-staging/composer/us/environment.tf | 2 +- iac/cal-itp-data-infra-staging/composer/us/kubernetes.tf | 5 ++--- iac/cal-itp-data-infra-staging/composer/us/variables.tf | 6 +++--- iac/cal-itp-data-infra/composer/us/environment.tf | 2 +- iac/cal-itp-data-infra/composer/us/kubernetes.tf | 3 +-- iac/cal-itp-data-infra/composer/us/variables.tf | 7 ++++--- warehouse/profiles.yml | 8 -------- 9 files changed, 12 insertions(+), 33 deletions(-) diff --git a/airflow/dags/publish_open_data/publish_california_open_data.yml b/airflow/dags/publish_open_data/publish_california_open_data.yml index 3e450063da..feb17790f8 100644 --- a/airflow/dags/publish_open_data/publish_california_open_data.yml +++ b/airflow/dags/publish_open_data/publish_california_open_data.yml @@ -14,15 +14,10 @@ is_delete_operator_pod: true get_logs: true env_vars: - GOOGLE_APPLICATION_CREDENTIALS: /secrets/jobs-data/service_account.json CALITP_BUCKET__DBT_ARTIFACTS: "{{ env_var('CALITP_BUCKET__DBT_ARTIFACTS') }}" CALITP_BUCKET__PUBLISH: "{{ env_var('CALITP_BUCKET__PUBLISH') }}" secrets: - - deploy_type: volume - deploy_target: /secrets/jobs-data/ - secret: jobs-data - key: service-account.json - deploy_type: env deploy_target: CALITP_CKAN_GTFS_SCHEDULE_KEY secret: jobs-data diff --git a/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/validate_gtfs_schedule.yml b/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/validate_gtfs_schedule.yml index e7b07f59a6..0ac26f34c3 100644 --- a/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/validate_gtfs_schedule.yml +++ b/airflow/dags/unzip_and_validate_gtfs_schedule_hourly/validate_gtfs_schedule.yml @@ -17,19 +17,12 @@ is_delete_operator_pod: true get_logs: true env_vars: - GOOGLE_APPLICATION_CREDENTIALS: /secrets/jobs-data/service_account.json AIRFLOW_ENV: "{{ env_var('AIRFLOW_ENV') }}" CALITP_USER: "{{ env_var('CALITP_USER') }}" CALITP_BUCKET__GTFS_SCHEDULE_RAW: "{{ env_var('CALITP_BUCKET__GTFS_SCHEDULE_RAW') }}" CALITP_BUCKET__GTFS_SCHEDULE_VALIDATION_HOURLY: "{{ env_var('CALITP_BUCKET__GTFS_SCHEDULE_VALIDATION_HOURLY') }}" GRAAS_SERVER_URL: "{{ env_var('GRAAS_SERVER_URL') }}" -secrets: - - deploy_type: volume - deploy_target: /secrets/jobs-data/ - secret: jobs-data - key: service_account.json - k8s_resources: request_memory: 5.0Gi request_cpu: 1 diff --git a/iac/cal-itp-data-infra-staging/composer/us/environment.tf b/iac/cal-itp-data-infra-staging/composer/us/environment.tf index 28cce09716..84197a7707 100644 --- a/iac/cal-itp-data-infra-staging/composer/us/environment.tf +++ b/iac/cal-itp-data-infra-staging/composer/us/environment.tf @@ -61,7 +61,7 @@ resource "google_composer_environment" "calitp-staging-composer" { "POD_LOCATION" = "us-west2", "POD_CLUSTER_NAME" = data.terraform_remote_state.gke.outputs.google_container_cluster_airflow-jobs-staging_name, "POD_SECRETS_NAMESPACE" = local.namespace, - "SERVICE_ACCOUNT_NAME" = local.service_account_name, + "SERVICE_ACCOUNT_NAME" = local.kubernetes_service_account, "CALITP_BUCKET__AGGREGATOR_SCRAPER" = "gs://${data.terraform_remote_state.gcs.outputs.google_storage_bucket_calitp-staging-aggregator-scraper_name}", "CALITP_BUCKET__AIRTABLE" = "gs://${data.terraform_remote_state.gcs.outputs.google_storage_bucket_calitp-staging-airtable_name}", "CALITP_BUCKET__AMPLITUDE_BENEFITS_EVENTS" = "gs://${data.terraform_remote_state.gcs.outputs.google_storage_bucket_calitp-staging-amplitude-benefits-events_name}", diff --git a/iac/cal-itp-data-infra-staging/composer/us/kubernetes.tf b/iac/cal-itp-data-infra-staging/composer/us/kubernetes.tf index 5fa8ea4655..d1771745d2 100644 --- a/iac/cal-itp-data-infra-staging/composer/us/kubernetes.tf +++ b/iac/cal-itp-data-infra-staging/composer/us/kubernetes.tf @@ -22,8 +22,7 @@ resource "kubernetes_secret" "composer" { namespace = local.namespace } data = { - "service_account.json" = base64decode(google_service_account_key.composer.private_key) - transitland-api-key = data.kubernetes_secret.composer.data.transitland-api-key + transitland-api-key = data.kubernetes_secret.composer.data.transitland-api-key } } @@ -38,7 +37,7 @@ resource "kubernetes_priority_class" "dbt-high-priority" { resource "kubernetes_service_account" "composer-service-account" { metadata { - name = local.service_account_name + name = local.kubernetes_service_account namespace = local.namespace annotations = { "iam.gke.io/gcp-service-account" = data.terraform_remote_state.iam.outputs.google_service_account_composer-service-account_email diff --git a/iac/cal-itp-data-infra-staging/composer/us/variables.tf b/iac/cal-itp-data-infra-staging/composer/us/variables.tf index 1c4830c488..8fec60c736 100644 --- a/iac/cal-itp-data-infra-staging/composer/us/variables.tf +++ b/iac/cal-itp-data-infra-staging/composer/us/variables.tf @@ -1,7 +1,7 @@ locals { - namespace = "airflow-jobs" - secret = "jobs-data" - service_account_name = "composer-service-account" + namespace = "airflow-jobs" + secret = "jobs-data" + kubernetes_service_account = "composer-service-account" # This regular expression corresponds to the Python package name specification # https://packaging.python.org/en/latest/specifications/name-normalization/ diff --git a/iac/cal-itp-data-infra/composer/us/environment.tf b/iac/cal-itp-data-infra/composer/us/environment.tf index 62d0dc53ab..96b1aa7ff1 100644 --- a/iac/cal-itp-data-infra/composer/us/environment.tf +++ b/iac/cal-itp-data-infra/composer/us/environment.tf @@ -61,7 +61,7 @@ resource "google_composer_environment" "calitp-composer" { "POD_LOCATION" = "us-west2", "POD_CLUSTER_NAME" = data.terraform_remote_state.gke.outputs.google_container_cluster_airflow-jobs_name, "POD_SECRETS_NAMESPACE" = local.namespace, - "SERVICE_ACCOUNT_NAME" = local.service_account_name, + "SERVICE_ACCOUNT_NAME" = local.kubernetes_service_account, "CALITP_BUCKET__AGGREGATOR_SCRAPER" = "gs://${data.terraform_remote_state.gcs.outputs.google_storage_bucket_calitp-aggregator-scraper_name}", "CALITP_BUCKET__AIRTABLE" = "gs://${data.terraform_remote_state.gcs.outputs.google_storage_bucket_calitp-airtable_name}", "CALITP_BUCKET__AMPLITUDE_BENEFITS_EVENTS" = "gs://${data.terraform_remote_state.gcs.outputs.google_storage_bucket_calitp-amplitude-benefits-events_name}", diff --git a/iac/cal-itp-data-infra/composer/us/kubernetes.tf b/iac/cal-itp-data-infra/composer/us/kubernetes.tf index ede1dd51af..6d5ecfc8b1 100644 --- a/iac/cal-itp-data-infra/composer/us/kubernetes.tf +++ b/iac/cal-itp-data-infra/composer/us/kubernetes.tf @@ -24,7 +24,6 @@ resource "kubernetes_secret" "composer" { data = { calitp-ckan-gtfs-schedule-key = data.kubernetes_secret.composer.data.calitp-ckan-gtfs-schedule-key - "service_account.json" = base64decode(google_service_account_key.composer.private_key) transitland-api-key = data.kubernetes_secret.composer.data.transitland-api-key } } @@ -40,7 +39,7 @@ resource "kubernetes_priority_class" "dbt-high-priority" { resource "kubernetes_service_account" "composer-service-account" { metadata { - name = local.service_account_name + name = local.kubernetes_service_account namespace = local.namespace annotations = { "iam.gke.io/gcp-service-account" = data.terraform_remote_state.iam.outputs.google_service_account_composer-service-account_email diff --git a/iac/cal-itp-data-infra/composer/us/variables.tf b/iac/cal-itp-data-infra/composer/us/variables.tf index ca988d599e..88812bbc06 100644 --- a/iac/cal-itp-data-infra/composer/us/variables.tf +++ b/iac/cal-itp-data-infra/composer/us/variables.tf @@ -1,7 +1,8 @@ locals { - namespace = "airflow-jobs" - secret = "jobs-data" - service_account_name = "composer-service-account" + namespace = "airflow-jobs" + secret = "jobs-data" + kubernetes_service_account = "composer-service-account" + # This regular expression corresponds to the Python package name specification # https://packaging.python.org/en/latest/specifications/name-normalization/ python_package_regex = "(?P[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9])(?P.*)" diff --git a/warehouse/profiles.yml b/warehouse/profiles.yml index f18d307259..535ed47eb4 100644 --- a/warehouse/profiles.yml +++ b/warehouse/profiles.yml @@ -26,10 +26,6 @@ calitp_warehouse: spark.executor.instances: "4" # dbt defaults to 2 spark.executor.memory: 4g spark.dynamicAllocation.maxExecutors: "16" - prod_service_account: - <<: *prod - method: service-account - keyfile: "{{ env_var('BIGQUERY_KEYFILE_LOCATION', '/secrets/jobs-data/service-account.json') }}" staging: &staging <<: *prod @@ -37,7 +33,3 @@ calitp_warehouse: database: cal-itp-data-infra-staging schema: staging gcs_bucket: test-calitp-dbt-python-models - staging_service_account: - <<: *staging - method: service-account - keyfile: "{{ env_var('BIGQUERY_KEYFILE_LOCATION', '/secrets/jobs-data/service-account.json') }}"