From bbf64a1ac1c0cc0091ba2785955923a05f477b23 Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Wed, 27 Nov 2024 15:32:31 +1100 Subject: [PATCH 1/3] feat(data-migrate): add data migrate fargate tasks --- config/config.ts | 2 + config/constants.ts | 7 +- config/stacks/dataMigrate.ts | 36 +++++ config/stacks/fileManager.ts | 18 ++- .../stacks/data-migrate/.dockerignore | 5 + .../stateless/stacks/data-migrate/.gitignore | 2 + .../stateless/stacks/data-migrate/Dockerfile | 10 ++ .../stateless/stacks/data-migrate/Makefile | 16 ++ .../stateless/stacks/data-migrate/README.md | 39 +++++ .../data-migrate/data_mover/__init__.py | 0 .../stacks/data-migrate/data_mover/cli.py | 58 ++++++++ .../data-migrate/data_mover/data_mover.py | 56 +++++++ .../stacks/data-migrate/deploy/stack.ts | 140 ++++++++++++++++++ .../stateless/stacks/data-migrate/poetry.lock | 58 ++++++++ .../stacks/data-migrate/pyproject.toml | 21 +++ .../statelessStackCollectionClass.ts | 7 + 16 files changed, 468 insertions(+), 7 deletions(-) create mode 100644 config/stacks/dataMigrate.ts create mode 100644 lib/workload/stateless/stacks/data-migrate/.dockerignore create mode 100644 lib/workload/stateless/stacks/data-migrate/.gitignore create mode 100644 lib/workload/stateless/stacks/data-migrate/Dockerfile create mode 100644 lib/workload/stateless/stacks/data-migrate/Makefile create mode 100644 lib/workload/stateless/stacks/data-migrate/README.md create mode 100644 lib/workload/stateless/stacks/data-migrate/data_mover/__init__.py create mode 100644 lib/workload/stateless/stacks/data-migrate/data_mover/cli.py create mode 100644 lib/workload/stateless/stacks/data-migrate/data_mover/data_mover.py create mode 100644 lib/workload/stateless/stacks/data-migrate/deploy/stack.ts create mode 100644 lib/workload/stateless/stacks/data-migrate/poetry.lock create mode 100644 lib/workload/stateless/stacks/data-migrate/pyproject.toml diff --git a/config/config.ts b/config/config.ts index f71c5a715..a3ebb4437 100644 --- a/config/config.ts +++ b/config/config.ts @@ -64,6 +64,7 @@ import { } from './stacks/oraCompressionPipelineManager'; import { getOraDecompressionManagerStackProps } from './stacks/oraDecompressionPipelineManager'; import { getPgDDProps } from './stacks/pgDD'; +import { getDataMigrateStackProps } from './stacks/dataMigrate'; interface EnvironmentConfig { name: string; @@ -131,6 +132,7 @@ export const getEnvironmentConfig = (stage: AppStage): EnvironmentConfig | null workflowManagerStackProps: getWorkflowManagerStackProps(stage), stackyMcStackFaceProps: getGlueStackProps(stage), fmAnnotatorProps: getFmAnnotatorProps(), + dataMigrateProps: getDataMigrateStackProps(stage), pgDDProps: getPgDDProps(stage), }, }; diff --git a/config/constants.ts b/config/constants.ts index 1d58cbfce..900b3a961 100644 --- a/config/constants.ts +++ b/config/constants.ts @@ -86,6 +86,11 @@ export const icav2PipelineCacheBucket: Record = { [AppStage.PROD]: 'pipeline-prod-cache-503977275616-ap-southeast-2', }; +// The test inventory bucket for dev. +export const fileManagerInventoryBucket: Record = { + [AppStage.BETA]: 'filemanager-inventory-test', +}; + // The archive bucket. Noting that this is only present for prod data. export const icav2ArchiveAnalysisBucket: Record = { [AppStage.PROD]: 'archive-prod-analysis-503977275616-ap-southeast-2', @@ -143,8 +148,8 @@ export const eventDlqNameFMAnnotator = 'orcabus-event-dlq-fmannotator'; export const serviceUserSecretName = 'orcabus/token-service-user'; // pragma: allowlist secret export const jwtSecretName = 'orcabus/token-service-jwt'; // pragma: allowlist secret export const icaAccessTokenSecretName = 'IcaSecretsPortal'; // pragma: allowlist secret - export const fileManagerIngestRoleName = 'orcabus-file-manager-ingest-role'; +export const dataMoverRoleName = 'orcabus-data-mover-role'; /* Resources required for BaseSpace TES stack diff --git a/config/stacks/dataMigrate.ts b/config/stacks/dataMigrate.ts new file mode 100644 index 000000000..97918abac --- /dev/null +++ b/config/stacks/dataMigrate.ts @@ -0,0 +1,36 @@ +import { + AppStage, + vpcProps, + oncoanalyserBucket, + icav2PipelineCacheBucket, + dataMoverRoleName, + icav2ArchiveAnalysisBucket, + icav2ArchiveFastqBucket, + fileManagerInventoryBucket, + logsApiGatewayConfig, +} from '../constants'; +import { DataMigrateStackProps } from '../../lib/workload/stateless/stacks/data-migrate/deploy/stack'; + +export const getDataMigrateStackProps = (stage: AppStage): DataMigrateStackProps => { + // For dev/staging we can write to any bucket that is also readable. + let writeToBuckets = [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]]; + switch (stage) { + case AppStage.BETA: + // For dev additionally, write to the filemanager inventory bucket for testing. + writeToBuckets.push(fileManagerInventoryBucket[stage]); + break; + case AppStage.PROD: + // For prod, we only allow writing to the archive buckets, nothing else. + writeToBuckets = [icav2ArchiveAnalysisBucket[stage], icav2ArchiveFastqBucket[stage]]; + break; + } + + return { + vpcProps, + dataMoverRoleName, + deleteFromBuckets: [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]], + readFromBuckets: [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]], + writeToBuckets, + logRetention: logsApiGatewayConfig[stage].retention, + }; +}; diff --git a/config/stacks/fileManager.ts b/config/stacks/fileManager.ts index 0492a588e..e7c634257 100644 --- a/config/stacks/fileManager.ts +++ b/config/stacks/fileManager.ts @@ -1,20 +1,26 @@ import { FilemanagerConfig } from '../../lib/workload/stateless/stacks/filemanager/deploy/stack'; import { AppStage, + cognitoApiGatewayConfig, computeSecurityGroupName, + corsAllowOrigins, databasePort, dbClusterEndpointHostParameterName, eventSourceQueueName, - vpcProps, - oncoanalyserBucket, - icav2PipelineCacheBucket, fileManagerIngestRoleName, + fileManagerInventoryBucket, + icav2PipelineCacheBucket, logsApiGatewayConfig, - cognitoApiGatewayConfig, - corsAllowOrigins, + oncoanalyserBucket, + vpcProps, } from '../constants'; export const getFileManagerStackProps = (stage: AppStage): FilemanagerConfig => { + const inventorySourceBuckets = []; + if (stage == AppStage.BETA) { + inventorySourceBuckets.push(fileManagerInventoryBucket[stage]); + } + return { securityGroupName: computeSecurityGroupName, vpcProps, @@ -22,7 +28,7 @@ export const getFileManagerStackProps = (stage: AppStage): FilemanagerConfig => databaseClusterEndpointHostParameter: dbClusterEndpointHostParameterName, port: databasePort, migrateDatabase: true, - inventorySourceBuckets: ['filemanager-inventory-test'], + inventorySourceBuckets, eventSourceBuckets: [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]], fileManagerRoleName: fileManagerIngestRoleName, apiGatewayCognitoProps: { diff --git a/lib/workload/stateless/stacks/data-migrate/.dockerignore b/lib/workload/stateless/stacks/data-migrate/.dockerignore new file mode 100644 index 000000000..fde686449 --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/.dockerignore @@ -0,0 +1,5 @@ +deploy +.gitignore +README.md +.ruff_cache +response.josn diff --git a/lib/workload/stateless/stacks/data-migrate/.gitignore b/lib/workload/stateless/stacks/data-migrate/.gitignore new file mode 100644 index 000000000..85b343a77 --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/.gitignore @@ -0,0 +1,2 @@ +.ruff_cache +response.json diff --git a/lib/workload/stateless/stacks/data-migrate/Dockerfile b/lib/workload/stateless/stacks/data-migrate/Dockerfile new file mode 100644 index 000000000..342fbaebf --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/Dockerfile @@ -0,0 +1,10 @@ +FROM public.ecr.aws/docker/library/python:3.13 + +RUN apt update -y && apt install -y awscli && pip3 install poetry + +WORKDIR /app + +COPY . . +RUN poetry install --no-interaction --no-root + +ENTRYPOINT ["poetry", "run", "dm"] diff --git a/lib/workload/stateless/stacks/data-migrate/Makefile b/lib/workload/stateless/stacks/data-migrate/Makefile new file mode 100644 index 000000000..a452fae16 --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/Makefile @@ -0,0 +1,16 @@ +.PHONY: * + +install: + @poetry update + +lint: install + @poetry run ruff format . + +check: lint + @poetry run ruff check . + +dm: install + @poetry run dm $(COMMAND) + +clean: + rm -rf data && rm -rf .ruff_cache diff --git a/lib/workload/stateless/stacks/data-migrate/README.md b/lib/workload/stateless/stacks/data-migrate/README.md new file mode 100644 index 000000000..efc568c8f --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/README.md @@ -0,0 +1,39 @@ +# Data migrate + +A service to migrate data between locations. + +## Data mover + +Locally, use the data mover to move or copy data between locations: + +```sh +poetry run dm move --source --destination +``` + +This command is also deployed as a fargate task which can move data between the +cache bucket and archive bucket: + +Note, if you are deploying this stack manually there may be an issue with docker +failing to login in. A seperate entry for the dev ecr might need to be created in +`~/.docker/config`: + +```json +{ + "auths": { + "843407916570.dkr.ecr.ap-southeast-2.amazonaws.com": {} + } +} +``` + +## Local development + +This project uses [poetry] to manage dependencies. + +Run the linter and formatter: + +``` +make check +``` + +[poetry]: https://python-poetry.org/ +[env-example]: .env.example diff --git a/lib/workload/stateless/stacks/data-migrate/data_mover/__init__.py b/lib/workload/stateless/stacks/data-migrate/data_mover/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/workload/stateless/stacks/data-migrate/data_mover/cli.py b/lib/workload/stateless/stacks/data-migrate/data_mover/cli.py new file mode 100644 index 000000000..4829281fc --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/data_mover/cli.py @@ -0,0 +1,58 @@ +import logging + +import click + +from data_mover.data_mover import DataMover + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.option( + "--source", + required=True, + help="The source to copy from.", +) +@click.option( + "--destination", + required=True, + help="The destination to copy to.", +) +def move(source, destination): + """ + Copy files from the source to the destination and delete the source if successful. + This command calls `aws s3 sync` directly, so it expects the AWS cli to be installed. + """ + dm = DataMover(source, destination, logger=logger) + dm.sync() + dm.delete() + + +@cli.command() +@click.option( + "--source", + required=True, + help="The source to copy from.", +) +@click.option( + "--destination", + required=True, + help="The destination to copy to.", +) +def copy(source, destination): + """ + Copy files from the source to the destination and keep the source if successful. + This command calls `aws s3 sync` directly, so it expects the AWS cli to be installed. + """ + dm = DataMover(source, destination, logger=logger) + dm.sync() + + +if __name__ == "__main__": + cli() diff --git a/lib/workload/stateless/stacks/data-migrate/data_mover/data_mover.py b/lib/workload/stateless/stacks/data-migrate/data_mover/data_mover.py new file mode 100644 index 000000000..be24f4aa0 --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/data_mover/data_mover.py @@ -0,0 +1,56 @@ +import logging +import subprocess + + +class DataMover: + """ + A class to manage moving data. + """ + + def __init__( + self, + source: str, + destination: str, + repeat: int = 2, + # 12 hours + timeout: int = 43200, + logger: logging.Logger = logging.getLogger(__name__), + ): + self.source = source + self.destination = destination + self.repeat = repeat + self.timeout = timeout + self.logger = logger + + def sync(self): + """ + Sync destination and source. + """ + self.logger.info( + f"syncing {self.repeat} times from {self.source} to {self.destination}" + ) + + out = None + for _ in range(self.repeat): + out = subprocess.run( + ["aws", "s3", "sync", self.source, self.destination], + check=True, + # 1 day + timeout=self.timeout, + ) + + if out.stdout is not None: + raise Exception("failed to sync - non-empty output") + + def delete(self): + """ + Delete the source. + """ + self.logger.info(f"deleting files from {self.source}") + + subprocess.run( + ["aws", "s3", "rm", "--recursive", self.source], + check=True, + # 1 day + timeout=self.timeout, + ) diff --git a/lib/workload/stateless/stacks/data-migrate/deploy/stack.ts b/lib/workload/stateless/stacks/data-migrate/deploy/stack.ts new file mode 100644 index 000000000..5f432ac64 --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/deploy/stack.ts @@ -0,0 +1,140 @@ +import { Duration, Stack, StackProps } from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import path from 'path'; +import { ISecurityGroup, IVpc, SecurityGroup, Vpc, VpcLookupOptions } from 'aws-cdk-lib/aws-ec2'; +import { PolicyStatement, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; +import { + AssetImage, + Cluster, + ContainerDefinition, + CpuArchitecture, + FargateTaskDefinition, + LogDriver, +} from 'aws-cdk-lib/aws-ecs'; +import { Platform } from 'aws-cdk-lib/aws-ecr-assets'; +import { LogGroup, RetentionDays } from 'aws-cdk-lib/aws-logs'; +import { EcsFargateLaunchTarget, EcsRunTask } from 'aws-cdk-lib/aws-stepfunctions-tasks'; +import { IntegrationPattern } from 'aws-cdk-lib/aws-stepfunctions'; + +/** + * Props for the data migrate stack. + */ +export type DataMigrateStackProps = { + /** + * Props to lookup the VPC with. + */ + vpcProps: VpcLookupOptions; + /** + * The name of the role for the data mover. + */ + dataMoverRoleName?: string; + /** + * Define the buckets that the mover is allowed to read from. + */ + readFromBuckets: string[]; + /** + * Define the buckets that the mover is allowed to write to. + */ + writeToBuckets: string[]; + /** + * Define the buckets that the mover is allowed to delete from after copying. + */ + deleteFromBuckets: string[]; + /** + * How long to keep logs for. + */ + logRetention: RetentionDays; +}; + +/** + * Deploy the data migrate stack. + */ +export class DataMigrateStack extends Stack { + private readonly vpc: IVpc; + private readonly role: Role; + private readonly cluster: Cluster; + + constructor(scope: Construct, id: string, props: StackProps & DataMigrateStackProps) { + super(scope, id, props); + + this.role = new Role(this, 'Role', { + assumedBy: new ServicePrincipal('ecs-tasks.amazonaws.com'), + description: props?.description ?? 'Fargate execution role', + roleName: props?.dataMoverRoleName, + maxSessionDuration: Duration.hours(12), + }); + this.addPoliciesForBuckets(this.role, props.readFromBuckets, [ + 's3:ListBucket', + 's3:GetObject', + 's3:GetObjectVersion', + 's3:GetObjectTagging', + 's3:GetObjectVersionTagging', + ]); + this.addPoliciesForBuckets(this.role, props.writeToBuckets, ['s3:PutObject']); + this.addPoliciesForBuckets(this.role, props.deleteFromBuckets, ['s3:DeleteObject']); + + this.vpc = Vpc.fromLookup(this, 'MainVpc', props.vpcProps); + this.cluster = new Cluster(this, 'FargateCluster', { + vpc: this.vpc, + enableFargateCapacityProviders: true, + }); + + const entry = path.join(__dirname, '..'); + const taskDefinition = new FargateTaskDefinition(this, 'TaskDefinition', { + runtimePlatform: { + cpuArchitecture: CpuArchitecture.X86_64, + }, + cpu: 256, + memoryLimitMiB: 1024, + taskRole: this.role, + family: 'orcabus-data-migrate-mover', + }); + const container = taskDefinition.addContainer('DataMoverContainer', { + stopTimeout: Duration.seconds(120), + image: new AssetImage(entry, { + platform: Platform.LINUX_AMD64, + }), + readonlyRootFilesystem: true, + logging: LogDriver.awsLogs({ + streamPrefix: 'data-migrate', + logRetention: props.logRetention, + }), + }); + + const securityGroup = new SecurityGroup(this, 'SecurityGroup', { + vpc: this.vpc, + allowAllOutbound: true, + description: 'Security group that allows a filemanager Lambda function to egress out.', + }); + + new EcsRunTask(this, 'Run the data mover', { + cluster: this.cluster, + taskDefinition: taskDefinition, + launchTarget: EcsFargateLaunchTarget, + securityGroups: {}, + subnets: { + subnetType: props.vpcSubnetSelection, + }, + resultPath: '$.dataMoverResult', + containerOverrides: [ + { + containerDefinition: container, + }, + ], + }); + } + + /** + * Add policies to the role. + */ + addPoliciesForBuckets(role: Role, buckets: string[], actions: string[]) { + buckets.map((bucket) => { + role.addToPolicy( + new PolicyStatement({ + actions, + resources: [`arn:aws:s3:::${bucket}`, `arn:aws:s3:::${bucket}/*`], + }) + ); + }); + } +} diff --git a/lib/workload/stateless/stacks/data-migrate/poetry.lock b/lib/workload/stateless/stacks/data-migrate/poetry.lock new file mode 100644 index 000000000..aa083a96f --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/poetry.lock @@ -0,0 +1,58 @@ +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "ruff" +version = "0.7.4" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.7.4-py3-none-linux_armv6l.whl", hash = "sha256:a4919925e7684a3f18e18243cd6bea7cfb8e968a6eaa8437971f681b7ec51478"}, + {file = "ruff-0.7.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cfb365c135b830778dda8c04fb7d4280ed0b984e1aec27f574445231e20d6c63"}, + {file = "ruff-0.7.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:63a569b36bc66fbadec5beaa539dd81e0527cb258b94e29e0531ce41bacc1f20"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d06218747d361d06fd2fdac734e7fa92df36df93035db3dc2ad7aa9852cb109"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0cea28d0944f74ebc33e9f934238f15c758841f9f5edd180b5315c203293452"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80094ecd4793c68b2571b128f91754d60f692d64bc0d7272ec9197fdd09bf9ea"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:997512325c6620d1c4c2b15db49ef59543ef9cd0f4aa8065ec2ae5103cedc7e7"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00b4cf3a6b5fad6d1a66e7574d78956bbd09abfd6c8a997798f01f5da3d46a05"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7dbdc7d8274e1422722933d1edddfdc65b4336abf0b16dfcb9dedd6e6a517d06"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e92dfb5f00eaedb1501b2f906ccabfd67b2355bdf117fea9719fc99ac2145bc"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3bd726099f277d735dc38900b6a8d6cf070f80828877941983a57bca1cd92172"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2e32829c429dd081ee5ba39aef436603e5b22335c3d3fff013cd585806a6486a"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:662a63b4971807623f6f90c1fb664613f67cc182dc4d991471c23c541fee62dd"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:876f5e09eaae3eb76814c1d3b68879891d6fde4824c015d48e7a7da4cf066a3a"}, + {file = "ruff-0.7.4-py3-none-win32.whl", hash = "sha256:75c53f54904be42dd52a548728a5b572344b50d9b2873d13a3f8c5e3b91f5cac"}, + {file = "ruff-0.7.4-py3-none-win_amd64.whl", hash = "sha256:745775c7b39f914238ed1f1b0bebed0b9155a17cd8bc0b08d3c87e4703b990d6"}, + {file = "ruff-0.7.4-py3-none-win_arm64.whl", hash = "sha256:11bff065102c3ae9d3ea4dc9ecdfe5a5171349cdd0787c1fc64761212fc9cf1f"}, + {file = "ruff-0.7.4.tar.gz", hash = "sha256:cd12e35031f5af6b9b93715d8c4f40360070b2041f81273d0527683d5708fce2"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.13" +content-hash = "9b96100033c44eace497bce068a383bd909a99bfa30f7fe92858a09d7369bce9" diff --git a/lib/workload/stateless/stacks/data-migrate/pyproject.toml b/lib/workload/stateless/stacks/data-migrate/pyproject.toml new file mode 100644 index 000000000..742bc776e --- /dev/null +++ b/lib/workload/stateless/stacks/data-migrate/pyproject.toml @@ -0,0 +1,21 @@ +[tool.poetry] +name = "data-migrate" +version = "0.1.0" +description = "A service to migrate data between locations and buckets." +authors = ["Marko malenic "] +readme = "README.md" +packages = [{ include = "data_mover" }] + +[tool.poetry.dependencies] +python = "^3.13" +click = "^8" + +[tool.poetry.group.dev.dependencies] +ruff = "^0.7" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry.scripts] +dm = "data_mover.cli:cli" diff --git a/lib/workload/stateless/statelessStackCollectionClass.ts b/lib/workload/stateless/statelessStackCollectionClass.ts index d10167501..34f494332 100644 --- a/lib/workload/stateless/statelessStackCollectionClass.ts +++ b/lib/workload/stateless/statelessStackCollectionClass.ts @@ -80,6 +80,7 @@ import { OraDecompressionManagerStackProps, } from './stacks/ora-decompression-manager/deploy'; import { PgDDStack, PgDDStackProps } from './stacks/pg-dd/deploy/stack'; +import { DataMigrateStack, DataMigrateStackProps } from './stacks/data-migrate/deploy/stack'; export interface StatelessStackCollectionProps { metadataManagerStackProps: MetadataManagerStackProps; @@ -105,6 +106,7 @@ export interface StatelessStackCollectionProps { workflowManagerStackProps: WorkflowManagerStackProps; stackyMcStackFaceProps: GlueStackProps; fmAnnotatorProps: FMAnnotatorConfigurableProps; + dataMigrateProps: DataMigrateStackProps; pgDDProps?: PgDDStackProps; } @@ -133,6 +135,7 @@ export class StatelessStackCollection { readonly workflowManagerStack: Stack; readonly stackyMcStackFaceStack: Stack; readonly fmAnnotator: Stack; + readonly dataMigrate: Stack; readonly pgDDStack: Stack; constructor( @@ -312,6 +315,10 @@ export class StatelessStackCollection { ...statelessConfiguration.fmAnnotatorProps, domainName: fileManagerStack.domainName, }); + this.fmAnnotator = new DataMigrateStack(scope, 'DataMigrateStack', { + ...this.createTemplateProps(env, 'DataMigrateStack'), + ...statelessConfiguration.dataMigrateProps, + }); if (statelessConfiguration.pgDDProps) { this.pgDDStack = new PgDDStack(scope, 'PgDDStack', { From fd1ca12c5e16ae42d4a8cd4bb6b4fd8c06fea52d Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Thu, 28 Nov 2024 16:38:14 +1100 Subject: [PATCH 2/3] feat(data-migrate): add executable step functions for fargate task --- .../stateless/stacks/data-migrate/README.md | 24 ++-- .../stacks/data-migrate/data_mover/cli.py | 22 +++- .../data-migrate/data_mover/data_mover.py | 35 ++++- .../stacks/data-migrate/deploy/stack.ts | 76 +++++++++-- .../stateless/stacks/data-migrate/poetry.lock | 121 +++++++++++++++++- .../stacks/data-migrate/pyproject.toml | 2 + .../statelessStackCollectionClass.ts | 2 +- test/stateless/deployment.test.ts | 52 ++++++++ 8 files changed, 299 insertions(+), 35 deletions(-) diff --git a/lib/workload/stateless/stacks/data-migrate/README.md b/lib/workload/stateless/stacks/data-migrate/README.md index efc568c8f..a17fec6eb 100644 --- a/lib/workload/stateless/stacks/data-migrate/README.md +++ b/lib/workload/stateless/stacks/data-migrate/README.md @@ -10,19 +10,17 @@ Locally, use the data mover to move or copy data between locations: poetry run dm move --source --destination ``` -This command is also deployed as a fargate task which can move data between the -cache bucket and archive bucket: - -Note, if you are deploying this stack manually there may be an issue with docker -failing to login in. A seperate entry for the dev ecr might need to be created in -`~/.docker/config`: - -```json -{ - "auths": { - "843407916570.dkr.ecr.ap-southeast-2.amazonaws.com": {} - } -} +This command is also deployed as a fargate task triggered by step functions. +The step functions expects as input a JSON which specifies the command (move or copy), +a source and a destination. For example, to move a specified `portalRunId` into the archive +bucket (this is probably easier in the step functions console): + +```sh +export ARN=$(aws stepfunctions list-state-machines | jq -r '.stateMachines | .[] | select(.name == "orcabus-data-migrate-mover") | .stateMachineArn') +export COMMAND="move" +export SOURCE="s3://umccr-temp-dev/move_test_2" +export DESTINATION="s3://filemanager-inventory-test/move_test_2" +aws stepfunctions start-execution --state-machine-arn $ARN --input "{\"command\" : \"$COMMAND\", \"source\": \"$SOURCE\", \"destination\": \"$DESTINATION\" }" ``` ## Local development diff --git a/lib/workload/stateless/stacks/data-migrate/data_mover/cli.py b/lib/workload/stateless/stacks/data-migrate/data_mover/cli.py index 4829281fc..2c3d7ba78 100644 --- a/lib/workload/stateless/stacks/data-migrate/data_mover/cli.py +++ b/lib/workload/stateless/stacks/data-migrate/data_mover/cli.py @@ -1,10 +1,11 @@ import logging +import sys import click from data_mover.data_mover import DataMover -logger = logging.getLogger(__name__) +logger = logging.getLogger() logger.setLevel(logging.DEBUG) @@ -29,9 +30,10 @@ def move(source, destination): Copy files from the source to the destination and delete the source if successful. This command calls `aws s3 sync` directly, so it expects the AWS cli to be installed. """ - dm = DataMover(source, destination, logger=logger) - dm.sync() - dm.delete() + data_mover = DataMover(source, destination, logger=logger) + data_mover.sync() + data_mover.delete() + data_mover.send_output() @cli.command() @@ -50,9 +52,15 @@ def copy(source, destination): Copy files from the source to the destination and keep the source if successful. This command calls `aws s3 sync` directly, so it expects the AWS cli to be installed. """ - dm = DataMover(source, destination, logger=logger) - dm.sync() + data_mover = DataMover(source, destination, logger=logger) + data_mover.sync() + data_mover.send_output() if __name__ == "__main__": - cli() + try: + cli(standalone_mode=False) + sys.exit(0) + except Exception as e: + DataMover.send_failure(str(e)) + sys.exit(1) diff --git a/lib/workload/stateless/stacks/data-migrate/data_mover/data_mover.py b/lib/workload/stateless/stacks/data-migrate/data_mover/data_mover.py index be24f4aa0..7b616bde0 100644 --- a/lib/workload/stateless/stacks/data-migrate/data_mover/data_mover.py +++ b/lib/workload/stateless/stacks/data-migrate/data_mover/data_mover.py @@ -1,6 +1,11 @@ +import json import logging +import os import subprocess +import boto3 +from mypy_boto3_stepfunctions import SFNClient + class DataMover: """ @@ -21,6 +26,7 @@ def __init__( self.repeat = repeat self.timeout = timeout self.logger = logger + self.output = "" def sync(self): """ @@ -38,6 +44,9 @@ def sync(self): # 1 day timeout=self.timeout, ) + self.logger.info(out.stdout) + + self.output += out.stdout or "" if out.stdout is not None: raise Exception("failed to sync - non-empty output") @@ -48,9 +57,33 @@ def delete(self): """ self.logger.info(f"deleting files from {self.source}") - subprocess.run( + out = subprocess.run( ["aws", "s3", "rm", "--recursive", self.source], check=True, # 1 day timeout=self.timeout, ) + self.logger.info(out.stdout) + + self.output += out.stdout or "" + + def send_output(self): + """ + Send successful task response with the output. + """ + task_token = os.getenv("DM_TASK_TOKEN") + if task_token is not None: + client: SFNClient = boto3.client("stepfunctions") + client.send_task_success( + taskToken=task_token, output=json.dumps(self.output) + ) + + @staticmethod + def send_failure(error: str): + """ + Send a failed task response. + """ + task_token = os.getenv("DM_TASK_TOKEN") + if task_token is not None: + client: SFNClient = boto3.client("stepfunctions") + client.send_task_failure(taskToken=task_token, error=error) diff --git a/lib/workload/stateless/stacks/data-migrate/deploy/stack.ts b/lib/workload/stateless/stacks/data-migrate/deploy/stack.ts index 5f432ac64..e92438725 100644 --- a/lib/workload/stateless/stacks/data-migrate/deploy/stack.ts +++ b/lib/workload/stateless/stacks/data-migrate/deploy/stack.ts @@ -1,20 +1,28 @@ import { Duration, Stack, StackProps } from 'aws-cdk-lib'; import { Construct } from 'constructs'; import path from 'path'; -import { ISecurityGroup, IVpc, SecurityGroup, Vpc, VpcLookupOptions } from 'aws-cdk-lib/aws-ec2'; +import { IVpc, SecurityGroup, SubnetType, Vpc, VpcLookupOptions } from 'aws-cdk-lib/aws-ec2'; import { PolicyStatement, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; import { AssetImage, Cluster, - ContainerDefinition, CpuArchitecture, FargateTaskDefinition, LogDriver, } from 'aws-cdk-lib/aws-ecs'; import { Platform } from 'aws-cdk-lib/aws-ecr-assets'; -import { LogGroup, RetentionDays } from 'aws-cdk-lib/aws-logs'; +import { RetentionDays } from 'aws-cdk-lib/aws-logs'; import { EcsFargateLaunchTarget, EcsRunTask } from 'aws-cdk-lib/aws-stepfunctions-tasks'; -import { IntegrationPattern } from 'aws-cdk-lib/aws-stepfunctions'; +import { + ChainDefinitionBody, + IntegrationPattern, + JsonPath, + Pass, + StateMachine, + Succeed, + Timeout, +} from 'aws-cdk-lib/aws-stepfunctions'; +import { startExecution } from 'aws-cdk-lib/custom-resources/lib/provider-framework/runtime/outbound'; /** * Props for the data migrate stack. @@ -63,6 +71,13 @@ export class DataMigrateStack extends Stack { roleName: props?.dataMoverRoleName, maxSessionDuration: Duration.hours(12), }); + this.role.addToPolicy( + new PolicyStatement({ + resources: ['*'], + actions: ['states:SendTaskSuccess', 'states:SendTaskFailure', 'states:SendTaskHeartbeat'], + }) + ); + this.addPoliciesForBuckets(this.role, props.readFromBuckets, [ 's3:ListBucket', 's3:GetObject', @@ -70,16 +85,24 @@ export class DataMigrateStack extends Stack { 's3:GetObjectTagging', 's3:GetObjectVersionTagging', ]); - this.addPoliciesForBuckets(this.role, props.writeToBuckets, ['s3:PutObject']); + this.addPoliciesForBuckets(this.role, props.writeToBuckets, [ + 's3:PutObject', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + // The bucket being written to also needs to be listed for sync to work. + 's3:ListBucket', + ]); this.addPoliciesForBuckets(this.role, props.deleteFromBuckets, ['s3:DeleteObject']); this.vpc = Vpc.fromLookup(this, 'MainVpc', props.vpcProps); this.cluster = new Cluster(this, 'FargateCluster', { vpc: this.vpc, enableFargateCapacityProviders: true, + containerInsights: true, }); const entry = path.join(__dirname, '..'); + const name = 'orcabus-data-migrate-mover'; const taskDefinition = new FargateTaskDefinition(this, 'TaskDefinition', { runtimePlatform: { cpuArchitecture: CpuArchitecture.X86_64, @@ -87,7 +110,7 @@ export class DataMigrateStack extends Stack { cpu: 256, memoryLimitMiB: 1024, taskRole: this.role, - family: 'orcabus-data-migrate-mover', + family: name, }); const container = taskDefinition.addContainer('DataMoverContainer', { stopTimeout: Duration.seconds(120), @@ -104,24 +127,53 @@ export class DataMigrateStack extends Stack { const securityGroup = new SecurityGroup(this, 'SecurityGroup', { vpc: this.vpc, allowAllOutbound: true, - description: 'Security group that allows a filemanager Lambda function to egress out.', }); - new EcsRunTask(this, 'Run the data mover', { + const startState = new Pass(this, 'StartState'); + const getCommand = new Pass(this, 'GetCommand', { + parameters: { + commands: JsonPath.array( + JsonPath.stringAt('$.command'), + '--source', + JsonPath.stringAt('$.source'), + '--destination', + JsonPath.stringAt('$.destination') + ), + }, + }); + // Todo take input from ArchiveData event portalRunId as command. + const task = new EcsRunTask(this, 'RunDataMover', { cluster: this.cluster, + taskTimeout: Timeout.duration(Duration.hours(12)), + integrationPattern: IntegrationPattern.WAIT_FOR_TASK_TOKEN, taskDefinition: taskDefinition, - launchTarget: EcsFargateLaunchTarget, - securityGroups: {}, + launchTarget: new EcsFargateLaunchTarget(), + securityGroups: [securityGroup], subnets: { - subnetType: props.vpcSubnetSelection, + subnetType: SubnetType.PRIVATE_WITH_EGRESS, }, - resultPath: '$.dataMoverResult', containerOverrides: [ { containerDefinition: container, + command: JsonPath.listAt('$.commands'), + environment: [ + { + name: 'DM_TASK_TOKEN', + value: JsonPath.stringAt('$$.Task.Token'), + }, + ], }, ], }); + // Todo output a complete event. + const finish = new Succeed(this, 'SuccessState'); + + new StateMachine(this, 'StateMachine', { + stateMachineName: name, + definitionBody: ChainDefinitionBody.fromChainable( + startState.next(getCommand).next(task).next(finish) + ), + }); } /** diff --git a/lib/workload/stateless/stacks/data-migrate/poetry.lock b/lib/workload/stateless/stacks/data-migrate/poetry.lock index aa083a96f..2b73ac6a9 100644 --- a/lib/workload/stateless/stacks/data-migrate/poetry.lock +++ b/lib/workload/stateless/stacks/data-migrate/poetry.lock @@ -1,5 +1,43 @@ # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +[[package]] +name = "boto3" +version = "1.35.71" +description = "The AWS SDK for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "boto3-1.35.71-py3-none-any.whl", hash = "sha256:e2969a246bb3208122b3c349c49cc6604c6fc3fc2b2f65d99d3e8ccd745b0c16"}, + {file = "boto3-1.35.71.tar.gz", hash = "sha256:3ed7172b3d4fceb6218bb0ec3668c4d40c03690939c2fca4f22bb875d741a07f"}, +] + +[package.dependencies] +botocore = ">=1.35.71,<1.36.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.10.0,<0.11.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.35.71" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">=3.8" +files = [ + {file = "botocore-1.35.71-py3-none-any.whl", hash = "sha256:fc46e7ab1df3cef66dfba1633f4da77c75e07365b36f03bd64a3793634be8fc1"}, + {file = "botocore-1.35.71.tar.gz", hash = "sha256:f9fa058e0393660c3fe53c1e044751beb64b586def0bd2212448a7c328b0cbba"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""} + +[package.extras] +crt = ["awscrt (==0.22.0)"] + [[package]] name = "click" version = "8.1.7" @@ -25,6 +63,42 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + +[[package]] +name = "mypy-boto3-stepfunctions" +version = "1.35.68" +description = "Type annotations for boto3 SFN 1.35.68 service generated with mypy-boto3-builder 8.3.1" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mypy_boto3_stepfunctions-1.35.68-py3-none-any.whl", hash = "sha256:ceea974dd9f779f19042dc6ce6ac64955036be9b825512a6a33eecb7d682f43c"}, + {file = "mypy_boto3_stepfunctions-1.35.68.tar.gz", hash = "sha256:4abef0d339463ebe612836f42154a092e95eed025baca9a15be1286cc9a90434"}, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + [[package]] name = "ruff" version = "0.7.4" @@ -52,7 +126,52 @@ files = [ {file = "ruff-0.7.4.tar.gz", hash = "sha256:cd12e35031f5af6b9b93715d8c4f40360070b2041f81273d0527683d5708fce2"}, ] +[[package]] +name = "s3transfer" +version = "0.10.4" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">=3.8" +files = [ + {file = "s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e"}, + {file = "s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7"}, +] + +[package.dependencies] +botocore = ">=1.33.2,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "urllib3" +version = "2.2.3" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, + {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + [metadata] lock-version = "2.0" python-versions = "^3.13" -content-hash = "9b96100033c44eace497bce068a383bd909a99bfa30f7fe92858a09d7369bce9" +content-hash = "8a4cdac00df82a72437dab5405390d84e9c0f8b6085336bfc6a699f57ca2b7c1" diff --git a/lib/workload/stateless/stacks/data-migrate/pyproject.toml b/lib/workload/stateless/stacks/data-migrate/pyproject.toml index 742bc776e..f65816583 100644 --- a/lib/workload/stateless/stacks/data-migrate/pyproject.toml +++ b/lib/workload/stateless/stacks/data-migrate/pyproject.toml @@ -8,6 +8,8 @@ packages = [{ include = "data_mover" }] [tool.poetry.dependencies] python = "^3.13" +boto3 = "^1" +mypy-boto3-stepfunctions = "^1" click = "^8" [tool.poetry.group.dev.dependencies] diff --git a/lib/workload/stateless/statelessStackCollectionClass.ts b/lib/workload/stateless/statelessStackCollectionClass.ts index 34f494332..eadd7c7b6 100644 --- a/lib/workload/stateless/statelessStackCollectionClass.ts +++ b/lib/workload/stateless/statelessStackCollectionClass.ts @@ -315,7 +315,7 @@ export class StatelessStackCollection { ...statelessConfiguration.fmAnnotatorProps, domainName: fileManagerStack.domainName, }); - this.fmAnnotator = new DataMigrateStack(scope, 'DataMigrateStack', { + this.dataMigrate = new DataMigrateStack(scope, 'DataMigrateStack', { ...this.createTemplateProps(env, 'DataMigrateStack'), ...statelessConfiguration.dataMigrateProps, }); diff --git a/test/stateless/deployment.test.ts b/test/stateless/deployment.test.ts index d6836e5b0..ee2bfec80 100644 --- a/test/stateless/deployment.test.ts +++ b/test/stateless/deployment.test.ts @@ -217,6 +217,58 @@ function applyNagSuppression(stackId: string, stack: Stack) { ); break; + case 'DataMigrateStack': + NagSuppressions.addResourceSuppressions( + stack, + [ + { + id: 'AwsSolutions-IAM5', + reason: "'*' is required to access buckets for moving data.", + appliesTo: [ + 'Resource::arn:aws:s3:::org.umccr.data.oncoanalyser/*', + 'Resource::arn:aws:s3:::pipeline-prod-cache-503977275616-ap-southeast-2/*', + 'Resource::arn:aws:s3:::archive-prod-analysis-503977275616-ap-southeast-2/*', + 'Resource::arn:aws:s3:::archive-prod-fastq-503977275616-ap-southeast-2/*', + ], + }, + ], + true + ); + NagSuppressions.addResourceSuppressionsByPath( + stack, + `/DataMigrateStack/StateMachine/Role/DefaultPolicy/Resource`, + [ + { + id: 'AwsSolutions-IAM5', + reason: '* is required to SendTaskSuccess/SendTaskFailure', + }, + ], + true + ); + NagSuppressions.addResourceSuppressionsByPath( + stack, + `/DataMigrateStack/Role/DefaultPolicy/Resource`, + [ + { + id: 'AwsSolutions-IAM5', + reason: '* is required to SendTaskSuccess/SendTaskFailure', + }, + ], + true + ); + NagSuppressions.addResourceSuppressionsByPath( + stack, + `/DataMigrateStack/TaskDefinition/ExecutionRole/DefaultPolicy/Resource`, + [ + { + id: 'AwsSolutions-IAM5', + reason: '* is required to SendTaskSuccess/SendTaskFailure', + }, + ], + true + ); + break; + default: break; } From 94711c10379323205dbe678ce4c25cd5d4bce6f1 Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Thu, 28 Nov 2024 16:45:51 +1100 Subject: [PATCH 3/3] style(data-migrate): make bucket permissions clearer --- config/stacks/dataMigrate.ts | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/config/stacks/dataMigrate.ts b/config/stacks/dataMigrate.ts index 97918abac..d3036a22f 100644 --- a/config/stacks/dataMigrate.ts +++ b/config/stacks/dataMigrate.ts @@ -1,25 +1,43 @@ import { AppStage, - vpcProps, - oncoanalyserBucket, - icav2PipelineCacheBucket, dataMoverRoleName, + fileManagerInventoryBucket, icav2ArchiveAnalysisBucket, icav2ArchiveFastqBucket, - fileManagerInventoryBucket, + icav2PipelineCacheBucket, logsApiGatewayConfig, + oncoanalyserBucket, + vpcProps, } from '../constants'; import { DataMigrateStackProps } from '../../lib/workload/stateless/stacks/data-migrate/deploy/stack'; export const getDataMigrateStackProps = (stage: AppStage): DataMigrateStackProps => { - // For dev/staging we can write to any bucket that is also readable. - let writeToBuckets = [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]]; + let readFromBuckets = []; + let deleteFromBuckets = []; + let writeToBuckets = []; switch (stage) { case AppStage.BETA: + // For dev/staging we can write to and read from the same set of buckets. + readFromBuckets = [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]]; + deleteFromBuckets = [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]]; + // For dev additionally, write to the filemanager inventory bucket for testing. - writeToBuckets.push(fileManagerInventoryBucket[stage]); + writeToBuckets = [ + oncoanalyserBucket[stage], + icav2PipelineCacheBucket[stage], + fileManagerInventoryBucket[stage], + ]; + break; + case AppStage.GAMMA: + readFromBuckets = [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]]; + deleteFromBuckets = [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]]; + + writeToBuckets = [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]]; break; case AppStage.PROD: + readFromBuckets = [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]]; + deleteFromBuckets = [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]]; + // For prod, we only allow writing to the archive buckets, nothing else. writeToBuckets = [icav2ArchiveAnalysisBucket[stage], icav2ArchiveFastqBucket[stage]]; break; @@ -28,8 +46,8 @@ export const getDataMigrateStackProps = (stage: AppStage): DataMigrateStackProps return { vpcProps, dataMoverRoleName, - deleteFromBuckets: [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]], - readFromBuckets: [oncoanalyserBucket[stage], icav2PipelineCacheBucket[stage]], + deleteFromBuckets, + readFromBuckets, writeToBuckets, logRetention: logsApiGatewayConfig[stage].retention, };