d3b-center · rbreslow · May 21, 2022 · May 12, 2022 · May 12, 2022 · May 12, 2022
diff --git a/.env.sample b/.env.sample
@@ -4,3 +4,5 @@ FLYWHEEL_GROUP=""
 ORTHANC_CREDENTIALS=""
 ORTHANC_HOST=""
 ORTHANC_PORT=80
+PHI_BUCKET_NAME="phi-data-bucket"
+SUBJECT_ID_MAPPING_PATH="s3://phi-data-bucket/subject_id_mapping.csv"
diff --git a/README.md b/README.md
@@ -31,6 +31,8 @@ Then, customize its contents with a text editor:
 - For `FLYWHEEL_GROUP`, specify either `d3b` or an alternative group created for testing (e.g., your name).
 - For `ORTHANC_CREDENTIALS`, use your Orthanc username and password specified like `username:password`.
 - For `ORTHANC_HOST`, specify the hostname (minus `http(s)://`) that you use to access Orthanc.
+- For `PHI_DATA_BUCKET_NAME`, specify the bucket name where the ETL should backup NIfTI files.
+- For `SUBJECT_ID_MAPPING_PATH`, specify the [path](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html) to the CSV file containing subject ID mappings.
 
 Next, run `update` to build the container image and initialize the database:
 

diff --git a/deployment/README.md b/deployment/README.md
@@ -0,0 +1,65 @@
+# Deployment
+
+- [AWS Credentials](#aws-credentials)
+- [Publish Container Images](#publish-container-images)
+- [Terraform](#terraform)
+- [Database Migrations](#database-migrations)
+
+## AWS Credentials
+
+Follow [these](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-sso.html#sso-configure-profile) instructions to configure a named AWS profile:
+
+- Use https://d-906762f877.awsapps.com/start as the SSO start URL.
+- Use `us-east-1` as the SSO region.
+
+Use the `aws sso login` command to refresh your login if your credentials expire:
+
+```console
+$ aws sso login --profile my-profile
+```
+
+## Publish Container Images
+
+Build a container image for the Python application (`cibuild`) and publish it to Amazon ECR (`cipublish`):
+
+```console
+$ ./scripts/cibuild
+...
+ => => naming to docker.io/library/image-deid-etl:da845bf
+$ ./scripts/cipublish
+```
+
+## Terraform
+
+Launch an instance of the included Terraform container image:
+
+```console
+$ docker-compose -f docker-compose.ci.yml run --rm terraform
+bash-5.1#
+```
+
+Once inside the context of the container image, set `GIT_COMMIT` to the tag of a published container image (e.g., `da845bf`):
+
+```console
+bash-5.1# export GIT_COMMIT=da845bf
+```
+
+Finally, use `infra` to generate and apply a Terraform plan:
+
+```console
+bash-5.1# ./scripts/infra plan
+bash-5.1# ./scripts/infra apply
+```
+
+## Database Migrations
+
+Execute database migrations by submitting a Batch job that invokes the application's `initdb` command:
+
+- Select the most recent job definition for [jobImageDeidEtl](https://console.aws.amazon.com/batch/home?region=us-east-1#job-definition).
+- Select **Submit new job**.
+- Select the following:
+  - **Name**: Any one-off description of the work you're performing, e.g.: `initialize-the-database`.
+  - **Job queue**: `queueImageDeidEtl`.
+  - **Command**: `image-deid-etl initdb`.
+- Click **Submit**.
+- Monitor the log output of the submitted job by viewing the job detail and clicking the link under **Log group name**.
diff --git a/deployment/terraform/batch.tf b/deployment/terraform/batch.tf
@@ -0,0 +1,112 @@
+#
+# Security Group resources
+#
+resource "aws_security_group" "batch" {
+  name_prefix = "sgBatchContainerInstance-"
+  vpc_id      = var.vpc_id
+
+  tags = {
+    Name = "sgBatchContainerInstance"
+  }
+
+  lifecycle {
+    create_before_destroy = true
+  }
+}
+
+#
+# Batch resources
+#
+resource "aws_launch_template" "default" {
+  name_prefix = "ltBatchContainerInstance-"
+
+  block_device_mappings {
+    device_name = "/dev/xvda"
+
+    ebs {
+      volume_size = var.batch_root_block_device_size
+      volume_type = var.batch_root_block_device_type
+    }
+  }
+
+  user_data = base64encode(file("cloud-config/batch-container-instance"))
+}
+
+resource "aws_batch_compute_environment" "default" {
+  compute_environment_name_prefix = "batch${local.short}-"
+  type                            = "MANAGED"
+  state                           = "ENABLED"
+  service_role                    = aws_iam_role.batch_service_role.arn
+
+  compute_resources {
+    type                = "SPOT"
+    allocation_strategy = var.batch_spot_fleet_allocation_strategy
+    bid_percentage      = var.batch_spot_fleet_bid_percentage
+
+    ec2_configuration {
+      image_type = "ECS_AL2"
+    }
+
+    ec2_key_pair = aws_key_pair.bastion.key_name
+
+    min_vcpus = var.batch_min_vcpus
+    max_vcpus = var.batch_max_vcpus
+
+    launch_template {
+      launch_template_id = aws_launch_template.default.id
+      version            = aws_launch_template.default.latest_version
+    }
+
+    spot_iam_fleet_role = aws_iam_role.spot_fleet_service_role.arn
+    instance_role       = aws_iam_instance_profile.ecs_instance_role.arn
+
+    instance_type = var.batch_instance_types
+
+    security_group_ids = [aws_security_group.batch.id]
+    subnets            = var.vpc_private_subnet_ids
+
+    tags = {
+      Name        = "BatchWorker"
+      Project     = var.project
+      Environment = var.environment
+    }
+  }
+
+  depends_on = [aws_iam_role_policy_attachment.batch_service_role_policy]
+
+  lifecycle {
+    create_before_destroy = true
+  }
+}
+
+resource "aws_batch_job_queue" "default" {
+  name                 = "queue${local.short}"
+  priority             = 1
+  state                = "ENABLED"
+  compute_environments = [aws_batch_compute_environment.default.arn]
+}
+
+resource "aws_batch_job_definition" "default" {
+  name = "job${local.short}"
+  type = "container"
+
+  container_properties = templatefile("${path.module}/job-definitions/image-deid-etl.json.tmpl", {
+    image_url = "${module.ecr.repository_url}:${var.image_tag}"
+
+    image_deid_etl_vcpus  = var.image_deid_etl_vcpus
+    image_deid_etl_memory = var.image_deid_etl_memory
+
+    database_url = "postgresql://${var.rds_database_username}:${var.rds_database_password}@${module.database.hostname}:${module.database.port}/${var.rds_database_name}"
+
+    flywheel_api_key    = var.flywheel_api_key
+    flywheel_group      = var.flywheel_group
+    orthanc_credentials = var.orthanc_credentials
+    orthanc_host        = var.orthanc_host
+    orthanc_port        = var.orthanc_port
+
+    phi_data_bucket_name    = var.d3b_phi_data_bucket_name
+    subject_id_mapping_path = var.subject_id_mapping_path
+
+    image_deid_etl_log_level = var.image_deid_etl_log_level
+  })
+}
diff --git a/deployment/terraform/cloud-config/batch-container-instance b/deployment/terraform/cloud-config/batch-container-instance
@@ -0,0 +1,18 @@
+Content-Type: multipart/mixed; boundary="==BOUNDARY=="
+MIME-Version: 1.0
+
+--==BOUNDARY==
+Content-Type: text/cloud-boothook; charset="us-ascii"
+
+# Manually mount unformatted instance store volumes. Mounting in a cloud-boothook
+# makes it more likely the drive is mounted before the Docker daemon and ECS agent
+# start, which helps mitigate potential race conditions.
+#
+# See:
+# - https://docs.aws.amazon.com/AmazonECS/latest/developerguide/bootstrap_container_instance.html#bootstrap_docker_daemon
+# - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/amazon-linux-ami-basics.html#supported-user-data-formats
+mkfs.ext4 -E nodiscard /dev/nvme1n1
+mkdir -p /media/ephemeral0
+mount -t ext4 -o defaults,nofail,discard /dev/nvme1n1 /media/ephemeral0
+
+--==BOUNDARY==
diff --git a/deployment/terraform/database.tf b/deployment/terraform/database.tf
@@ -7,7 +7,7 @@ resource "aws_db_subnet_group" "default" {
   subnet_ids  = var.vpc_private_subnet_ids
 
   tags = {
-    Name        = "dbsngDatabaseServer"
+    Name = "dbsngDatabaseServer"
   }
 }
 
@@ -57,9 +57,7 @@ resource "aws_db_parameter_group" "default" {
   }
 
   tags = {
-    Name        = "dbpgDatabaseServer"
-    Project     = var.project
-    Environment = var.environment
+    Name = "dbpgDatabaseServer"
   }
 
   lifecycle {

diff --git a/deployment/terraform/firewall.tf b/deployment/terraform/firewall.tf
@@ -11,6 +11,16 @@ resource "aws_security_group_rule" "bastion_rds_egress" {
   source_security_group_id = module.database.database_security_group_id
 }
 
+resource "aws_security_group_rule" "bastion_ssh_egress" {
+  type      = "egress"
+  from_port = 22
+  to_port   = 22
+  protocol  = "tcp"
+
+  security_group_id        = aws_security_group.bastion.id
+  source_security_group_id = aws_security_group.batch.id
+}
+
 #
 # RDS security group resources
 #
@@ -24,3 +34,55 @@ resource "aws_security_group_rule" "rds_bastion_ingress" {
   source_security_group_id = aws_security_group.bastion.id
 }
 
+resource "aws_security_group_rule" "rds_batch_ingress" {
+  type      = "ingress"
+  from_port = module.database.port
+  to_port   = module.database.port
+  protocol  = "tcp"
+
+  security_group_id        = module.database.database_security_group_id
+  source_security_group_id = aws_security_group.batch.id
+}
+
+#
+# Batch container instance security group resources
+#
+resource "aws_security_group_rule" "batch_http_egress" {
+  type        = "egress"
+  from_port   = 80
+  to_port     = 80
+  protocol    = "tcp"
+  cidr_blocks = ["0.0.0.0/0"]
+
+  security_group_id = aws_security_group.batch.id
+}
+
+resource "aws_security_group_rule" "batch_https_egress" {
+  type        = "egress"
+  from_port   = 443
+  to_port     = 443
+  protocol    = "tcp"
+  cidr_blocks = ["0.0.0.0/0"]
+
+  security_group_id = aws_security_group.batch.id
+}
+
+resource "aws_security_group_rule" "batch_rds_egress" {
+  type      = "egress"
+  from_port = module.database.port
+  to_port   = module.database.port
+  protocol  = "tcp"
+
+  security_group_id        = aws_security_group.batch.id
+  source_security_group_id = module.database.database_security_group_id
+}
+
+resource "aws_security_group_rule" "batch_bastion_ingress" {
+  type      = "ingress"
+  from_port = 22
+  to_port   = 22
+  protocol  = "tcp"
+
+  security_group_id        = aws_security_group.batch.id
+  source_security_group_id = aws_security_group.bastion.id
+}