From 89b6b418ea25b625408330981dddfc4ebe6c080f Mon Sep 17 00:00:00 2001 From: Alexis Lucattini Date: Fri, 1 Nov 2024 19:00:31 +1100 Subject: [PATCH 1/4] Added ora compression and ora decompression services --- .gitattributes | 1 + cdk.context.json | 240 ++++++++++++ config/config.ts | 9 + config/constants.ts | 50 +++ .../stacks/oraCompressionPipelineManager.ts | 57 +++ .../stacks/oraDecompressionPipelineManager.ts | 28 ++ .../icav2-copy-files-batch/index.ts | 4 +- .../components/icav2-copy-files/index.ts | 7 +- .../index.ts | 128 +++++++ ...press_ora_fastq_pair_sfn_template.asl.json | 84 +++++ .../tasks/ora_decompression/Dockerfile | 93 +++++ .../ora_decompression/binaries/orad.tar.gz | 3 + .../ora_decompression/docker-entrypoint.sh | 108 ++++++ .../scripts/download_icav2_file.py | 30 ++ .../scripts/get_aws_credentials_access.py | 49 +++ .../scripts/get_icav2_download_url.py | 29 ++ .../ora_decompression/scripts/get_s3_uri.py | 35 ++ .../index.ts | 5 +- .../Readme.md | 5 - .../index.ts | 23 -- .../uuid_py/requirements.txt | 1 - .../uuid_py/run_uuid.py | 14 - .../index.ts | 4 +- .../sfn-icav2-ready-event-handler/index.ts | 18 +- .../index.ts | 2 +- .../index.ts | 3 +- .../deploy/stack.ts | 54 +++ .../stateful/statefulStackCollectionClass.ts | 15 + .../index.ts | 2 +- .../stacks/ora-compression-manager/Readme.md | 140 +++++++ .../ora-compression-manager/deploy/index.ts | 284 ++++++++++++++ .../merge_file_sizes_for_fastq_list_rows.py | 349 ++++++++++++++++++ .../requirements.txt | 2 + .../set_outputs_json_py/requirements.txt | 1 + .../set_outputs_json_py/set_outputs_json.py | 129 +++++++ .../fastq_list_row_compression_event.asl.json | 63 ++++ .../set_compression_inputs.asl.json | 99 +++++ .../set_compression_outputs.asl.json | 80 ++++ .../ora-decompression-manager/README.md | 48 +++ .../ora-decompression-manager/deploy/index.ts | 96 +++++ ...ecompression_manager_sfn_template.asl.json | 50 +++ .../statelessStackCollectionClass.ts | 31 ++ 42 files changed, 2401 insertions(+), 72 deletions(-) create mode 100644 config/stacks/oraCompressionPipelineManager.ts create mode 100644 config/stacks/oraDecompressionPipelineManager.ts create mode 100644 lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts create mode 100644 lib/workload/components/ora-file-decompression-fq-pair-sfn/step_functions_templates/decompress_ora_fastq_pair_sfn_template.asl.json create mode 100644 lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/Dockerfile create mode 100644 lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/binaries/orad.tar.gz create mode 100644 lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/docker-entrypoint.sh create mode 100644 lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/download_icav2_file.py create mode 100644 lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_aws_credentials_access.py create mode 100644 lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_icav2_download_url.py create mode 100644 lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_s3_uri.py delete mode 100644 lib/workload/components/python-lambda-uuid-generator-function/Readme.md delete mode 100644 lib/workload/components/python-lambda-uuid-generator-function/index.ts delete mode 100644 lib/workload/components/python-lambda-uuid-generator-function/uuid_py/requirements.txt delete mode 100644 lib/workload/components/python-lambda-uuid-generator-function/uuid_py/run_uuid.py create mode 100644 lib/workload/stateful/stacks/ora-decompression-dynamodb/deploy/stack.ts create mode 100644 lib/workload/stateless/stacks/ora-compression-manager/Readme.md create mode 100644 lib/workload/stateless/stacks/ora-compression-manager/deploy/index.ts create mode 100644 lib/workload/stateless/stacks/ora-compression-manager/lambdas/merge_file_sizes_for_fastq_list_rows_py/merge_file_sizes_for_fastq_list_rows.py create mode 100644 lib/workload/stateless/stacks/ora-compression-manager/lambdas/merge_file_sizes_for_fastq_list_rows_py/requirements.txt create mode 100644 lib/workload/stateless/stacks/ora-compression-manager/lambdas/set_outputs_json_py/requirements.txt create mode 100644 lib/workload/stateless/stacks/ora-compression-manager/lambdas/set_outputs_json_py/set_outputs_json.py create mode 100644 lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/fastq_list_row_compression_event.asl.json create mode 100644 lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/set_compression_inputs.asl.json create mode 100644 lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/set_compression_outputs.asl.json create mode 100644 lib/workload/stateless/stacks/ora-decompression-manager/README.md create mode 100644 lib/workload/stateless/stacks/ora-decompression-manager/deploy/index.ts create mode 100644 lib/workload/stateless/stacks/ora-decompression-manager/step_functions_templates/ora_decompression_manager_sfn_template.asl.json diff --git a/.gitattributes b/.gitattributes index baa1a1850..537ac35c1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ /.yarn/releases/** binary /.yarn/plugins/** binary *.json.gz filter=lfs diff=lfs merge=lfs -text +*.tar.gz filter=lfs diff=lfs merge=lfs -text diff --git a/cdk.context.json b/cdk.context.json index 3d8e98ba3..acb9a7bf5 100644 --- a/cdk.context.json +++ b/cdk.context.json @@ -79,6 +79,86 @@ } ] }, + "vpc-provider:account=843407916570:filter.tag:Name=main-vpc:region=ap-southeast-2:returnAsymmetricSubnets=true": { + "vpcId": "vpc-00eafc63c0dfca266", + "vpcCidrBlock": "10.2.0.0/16", + "ownerAccountId": "843407916570", + "availabilityZones": [], + "subnetGroups": [ + { + "name": "database", + "type": "Isolated", + "subnets": [ + { + "subnetId": "subnet-08edbca7f0eda37f6", + "cidr": "10.2.40.0/23", + "availabilityZone": "ap-southeast-2a", + "routeTableId": "rtb-04f3e62b4f5c4f071" + }, + { + "subnetId": "subnet-00e0de28430f6e3cc", + "cidr": "10.2.42.0/23", + "availabilityZone": "ap-southeast-2b", + "routeTableId": "rtb-04f3e62b4f5c4f071" + }, + { + "subnetId": "subnet-01009a3a383f4bff1", + "cidr": "10.2.44.0/23", + "availabilityZone": "ap-southeast-2c", + "routeTableId": "rtb-04f3e62b4f5c4f071" + } + ] + }, + { + "name": "public", + "type": "Public", + "subnets": [ + { + "subnetId": "subnet-0fab038b0341872f1", + "cidr": "10.2.0.0/23", + "availabilityZone": "ap-southeast-2a", + "routeTableId": "rtb-0645235a1d1a53601" + }, + { + "subnetId": "subnet-0e84dd3a07fb770f5", + "cidr": "10.2.2.0/23", + "availabilityZone": "ap-southeast-2b", + "routeTableId": "rtb-0645235a1d1a53601" + }, + { + "subnetId": "subnet-093aee876a555f218", + "cidr": "10.2.4.0/23", + "availabilityZone": "ap-southeast-2c", + "routeTableId": "rtb-0645235a1d1a53601" + } + ] + }, + { + "name": "private", + "type": "Private", + "subnets": [ + { + "subnetId": "subnet-050e6fb0f6028178b", + "cidr": "10.2.20.0/23", + "availabilityZone": "ap-southeast-2a", + "routeTableId": "rtb-06aa800ce565746f4" + }, + { + "subnetId": "subnet-07b944de33d98047e", + "cidr": "10.2.22.0/23", + "availabilityZone": "ap-southeast-2b", + "routeTableId": "rtb-06aa800ce565746f4" + }, + { + "subnetId": "subnet-0207da0a8acfcb732", + "cidr": "10.2.24.0/23", + "availabilityZone": "ap-southeast-2c", + "routeTableId": "rtb-06aa800ce565746f4" + } + ] + } + ] + }, "vpc-provider:account=455634345446:filter.tag:Name=main-vpc:filter.tag:Stack=networking:region=ap-southeast-2:returnAsymmetricSubnets=true": { "vpcId": "vpc-0e226f3aa0f9c5781", "vpcCidrBlock": "10.2.0.0/16", @@ -159,6 +239,86 @@ } ] }, + "vpc-provider:account=455634345446:filter.tag:Name=main-vpc:region=ap-southeast-2:returnAsymmetricSubnets=true": { + "vpcId": "vpc-0e226f3aa0f9c5781", + "vpcCidrBlock": "10.2.0.0/16", + "ownerAccountId": "455634345446", + "availabilityZones": [], + "subnetGroups": [ + { + "name": "database", + "type": "Isolated", + "subnets": [ + { + "subnetId": "subnet-016b98cc640954651", + "cidr": "10.2.40.0/23", + "availabilityZone": "ap-southeast-2a", + "routeTableId": "rtb-014f5bf2c2d4af5e7" + }, + { + "subnetId": "subnet-0c2c217219ed56bfc", + "cidr": "10.2.42.0/23", + "availabilityZone": "ap-southeast-2b", + "routeTableId": "rtb-014f5bf2c2d4af5e7" + }, + { + "subnetId": "subnet-03e6fba027a1e0427", + "cidr": "10.2.44.0/23", + "availabilityZone": "ap-southeast-2c", + "routeTableId": "rtb-014f5bf2c2d4af5e7" + } + ] + }, + { + "name": "private", + "type": "Private", + "subnets": [ + { + "subnetId": "subnet-01308be8bb704e5ef", + "cidr": "10.2.20.0/23", + "availabilityZone": "ap-southeast-2a", + "routeTableId": "rtb-0ad5a1fddb41ed1cf" + }, + { + "subnetId": "subnet-0ab125fef23f8feed", + "cidr": "10.2.22.0/23", + "availabilityZone": "ap-southeast-2b", + "routeTableId": "rtb-0ad5a1fddb41ed1cf" + }, + { + "subnetId": "subnet-02290481f7651e39b", + "cidr": "10.2.24.0/23", + "availabilityZone": "ap-southeast-2c", + "routeTableId": "rtb-0ad5a1fddb41ed1cf" + } + ] + }, + { + "name": "public", + "type": "Public", + "subnets": [ + { + "subnetId": "subnet-079ba5a17c7102446", + "cidr": "10.2.0.0/23", + "availabilityZone": "ap-southeast-2a", + "routeTableId": "rtb-08cf65d720bf1e5f3" + }, + { + "subnetId": "subnet-00df4e6910885d2f6", + "cidr": "10.2.2.0/23", + "availabilityZone": "ap-southeast-2b", + "routeTableId": "rtb-08cf65d720bf1e5f3" + }, + { + "subnetId": "subnet-027a37df34939ab32", + "cidr": "10.2.4.0/23", + "availabilityZone": "ap-southeast-2c", + "routeTableId": "rtb-08cf65d720bf1e5f3" + } + ] + } + ] + }, "vpc-provider:account=472057503814:filter.tag:Name=main-vpc:filter.tag:Stack=networking:region=ap-southeast-2:returnAsymmetricSubnets=true": { "vpcId": "vpc-0dc99f521ceaa3f2d", "vpcCidrBlock": "10.2.0.0/16", @@ -239,6 +399,86 @@ } ] }, + "vpc-provider:account=472057503814:filter.tag:Name=main-vpc:region=ap-southeast-2:returnAsymmetricSubnets=true": { + "vpcId": "vpc-0dc99f521ceaa3f2d", + "vpcCidrBlock": "10.2.0.0/16", + "ownerAccountId": "472057503814", + "availabilityZones": [], + "subnetGroups": [ + { + "name": "public", + "type": "Public", + "subnets": [ + { + "subnetId": "subnet-0a7fb9d501192f5ee", + "cidr": "10.2.0.0/23", + "availabilityZone": "ap-southeast-2a", + "routeTableId": "rtb-0c10b927ab0264377" + }, + { + "subnetId": "subnet-0d61be977ee60a5b5", + "cidr": "10.2.2.0/23", + "availabilityZone": "ap-southeast-2b", + "routeTableId": "rtb-0c10b927ab0264377" + }, + { + "subnetId": "subnet-0d8226a9b4af34507", + "cidr": "10.2.4.0/23", + "availabilityZone": "ap-southeast-2c", + "routeTableId": "rtb-0c10b927ab0264377" + } + ] + }, + { + "name": "database", + "type": "Isolated", + "subnets": [ + { + "subnetId": "subnet-03ac51c3ab4223a1a", + "cidr": "10.2.40.0/23", + "availabilityZone": "ap-southeast-2a", + "routeTableId": "rtb-01266550d60b56ddb" + }, + { + "subnetId": "subnet-06d14dcb934f22c5e", + "cidr": "10.2.42.0/23", + "availabilityZone": "ap-southeast-2b", + "routeTableId": "rtb-01266550d60b56ddb" + }, + { + "subnetId": "subnet-0f5a5386ddf295579", + "cidr": "10.2.44.0/23", + "availabilityZone": "ap-southeast-2c", + "routeTableId": "rtb-01266550d60b56ddb" + } + ] + }, + { + "name": "private", + "type": "Private", + "subnets": [ + { + "subnetId": "subnet-01be4c1109eca3446", + "cidr": "10.2.20.0/23", + "availabilityZone": "ap-southeast-2a", + "routeTableId": "rtb-067d123217c80f6bd" + }, + { + "subnetId": "subnet-070a9acba78168239", + "cidr": "10.2.22.0/23", + "availabilityZone": "ap-southeast-2b", + "routeTableId": "rtb-067d123217c80f6bd" + }, + { + "subnetId": "subnet-01ae2b4ad1eb584d7", + "cidr": "10.2.24.0/23", + "availabilityZone": "ap-southeast-2c", + "routeTableId": "rtb-067d123217c80f6bd" + } + ] + } + ] + }, "security-group:account=843407916570:region=ap-southeast-2:securityGroupName=OrcaBusSharedComputeSecurityGroup:vpcId=vpc-00eafc63c0dfca266": { "securityGroupId": "sg-03abb47eba799e044", "allowAllOutbound": false diff --git a/config/config.ts b/config/config.ts index 8b39c508e..691b0fcc2 100644 --- a/config/config.ts +++ b/config/config.ts @@ -58,6 +58,11 @@ import { getOncoanalyserPipelineManagerStackProps, getOncoanalyserPipelineTableStackProps, } from './stacks/oncoanalyser'; +import { + getOraCompressionIcav2PipelineManagerStackProps, + getOraCompressionIcav2PipelineTableStackProps, +} from './stacks/oraCompressionPipelineManager'; +import { getOraDecompressionManagerStackProps } from './stacks/oraDecompressionPipelineManager'; interface EnvironmentConfig { name: string; @@ -92,6 +97,7 @@ export const getEnvironmentConfig = (stage: AppStage): EnvironmentConfig | null wtsIcav2PipelineTableStackProps: getWtsIcav2PipelineTableStackProps(), umccriseIcav2PipelineTableStackProps: getUmccriseIcav2PipelineTableStackProps(), rnasumIcav2PipelineTableStackProps: getRnasumIcav2PipelineTableStackProps(), + oraCompressionIcav2PipelineTableStackProps: getOraCompressionIcav2PipelineTableStackProps(), BclConvertTableStackProps: getBclConvertManagerTableStackProps(stage), stackyStatefulTablesStackProps: getStatefulGlueStackProps(), pierianDxPipelineTableStackProps: getPierianDxPipelineTableStackProps(), @@ -115,6 +121,9 @@ export const getEnvironmentConfig = (stage: AppStage): EnvironmentConfig | null pieriandxPipelineManagerStackProps: getPierianDxPipelineManagerStackProps(stage), oncoanalyserPipelineManagerStackProps: getOncoanalyserPipelineManagerStackProps(stage), sashPipelineManagerStackProps: getSashPipelineManagerStackProps(stage), + oraCompressionIcav2PipelineManagerStackProps: + getOraCompressionIcav2PipelineManagerStackProps(stage), + oraDecompressionManagerStackProps: getOraDecompressionManagerStackProps(stage), eventSchemaStackProps: getEventSchemaStackProps(), dataSchemaStackProps: getDataSchemaStackProps(), bclConvertManagerStackProps: getBclConvertManagerStackProps(stage), diff --git a/config/constants.ts b/config/constants.ts index 535f7353f..7dc1c6233 100644 --- a/config/constants.ts +++ b/config/constants.ts @@ -813,3 +813,53 @@ export const stackyAnalysisLogsUriSsmParameterName = '/orcabus/stacky/analysis_l // stg: s3://pipeline-stg-cache-503977275616-ap-southeast-2/byob-icav2/staging/cache/__workflow_name__/__portal_run_id__/ // prod: s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production/cache/__workflow_name__/__portal_run_id__/ export const stackyAnalysisCacheUriSsmParameterName = '/orcabus/stacky/analysis_cache_uri'; + +/* +Resources generated by the ORA Compression pipeline +*/ + +export const oraCompressionSSMRoot = '/orcabus/ora_compression/'; + +export const oraCompressionIcav2PipelineManagerDynamodbTableName = + 'oraCompressionICAv2AnalysesDynamoDBTable'; + +// Stateful +export const oraCompressionDynamoDbTableSSMName = path.join( + oraCompressionSSMRoot, + 'dynamodb_table_name' +); +export const oraCompressionDynamoDbTableSSMArn = path.join( + oraCompressionSSMRoot, + 'dynamodb_table_arn' +); + +// Stateless +export const oraCompressionIcav2PipelineWorkflowType = 'ora-compression'; +export const oraCompressionIcav2PipelineWorkflowTypeVersion = '4.2.4--v2'; +export const oraCompressionIcav2ServiceVersion = '2024.07.01'; + +export const oraCompressionIcav2ReadyEventSource = 'orcabus.workflowmanager'; +export const oraCompressionIcav2EventSource = 'orcabus.oracompression'; +export const oraCompressionIcav2EventDetailType = 'WorkflowRunStateChange'; +export const oraCompressionStateMachinePrefix = 'oraCompressionSfn'; + +/* +Resources used by the ora compression pipeline +*/ + +// Release can be found here: https://github.com/umccr/cwl-ica/releases/tag/dragen-instrument-run-fastq-to-ora-pipeline%2F4.2.4__20241030041958 +// Pipeline ID is: ba8f618a-842f-4a2f-9b2f-a074c0472218 // FIXME not in stg/prod +export const oraCompressionIcav2PipelineIdSSMParameterPath = + '/icav2/umccr-prod/ora_compression_pipeline_id'; + +// Default Reference Uri for compressing ORA files // FIXME not in stg/prod +export const oraCompressionDefaultReferenceUriSSmParameterPath = + '/icav2/umccr-prod/ora_compression_default_reference_version_uri'; + +/* +Resources generated by the ora decompression manager +*/ +export const oraDecompressionIcav2ReadyEventSource = 'orcabus.workflowmanager'; +export const oraDecompressionIcav2EventSource = 'orcabus.oradecompression'; +export const oraDecompressionIcav2EventDetailType = 'FastqListRowDecompressed'; +export const oraDecompressionStateMachinePrefix = 'oraDecompressionSfn'; diff --git a/config/stacks/oraCompressionPipelineManager.ts b/config/stacks/oraCompressionPipelineManager.ts new file mode 100644 index 000000000..888510b29 --- /dev/null +++ b/config/stacks/oraCompressionPipelineManager.ts @@ -0,0 +1,57 @@ +import { + AppStage, + eventBusName, + icaEventPipeStackName, + icav2AccessTokenSecretName, + oraCompressionIcav2PipelineIdSSMParameterPath, + oraCompressionIcav2PipelineManagerDynamodbTableName, + oraCompressionIcav2PipelineWorkflowType, + oraCompressionIcav2PipelineWorkflowTypeVersion, + oraCompressionIcav2ServiceVersion, + oraCompressionIcav2ReadyEventSource, + oraCompressionIcav2EventSource, + oraCompressionIcav2EventDetailType, + oraCompressionDynamoDbTableSSMArn, + oraCompressionDynamoDbTableSSMName, + oraCompressionDefaultReferenceUriSSmParameterPath, + oraCompressionStateMachinePrefix, +} from '../constants'; +import { OraCompressionIcav2PipelineTableConfig } from '../../lib/workload/stateful/stacks/ora-decompression-dynamodb/deploy/stack'; +import { OraCompressionIcav2PipelineManagerConfig } from '../../lib/workload/stateless/stacks/ora-compression-manager/deploy'; + +// Stateful +export const getOraCompressionIcav2PipelineTableStackProps = + (): OraCompressionIcav2PipelineTableConfig => { + return { + oraDecompressionIcav2DynamodbTableArnSsmParameterPath: oraCompressionDynamoDbTableSSMArn, + oraDecompressionIcav2DynamodbTableNameSsmParameterPath: oraCompressionDynamoDbTableSSMName, + dynamodbTableName: oraCompressionIcav2PipelineManagerDynamodbTableName, + }; + }; + +// Stateless +export const getOraCompressionIcav2PipelineManagerStackProps = ( + stage: AppStage +): OraCompressionIcav2PipelineManagerConfig => { + return { + /* ICAv2 Pipeline analysis essentials */ + icav2TokenSecretId: icav2AccessTokenSecretName[stage], // "/icav2/umccr-prod/service-production-jwt-token-secret-arn" + /* Table to store analyis metadata */ + dynamodbTableName: oraCompressionIcav2PipelineManagerDynamodbTableName, + /* Internal and external buses */ + eventBusName: eventBusName, + icaEventPipeName: `${icaEventPipeStackName}Pipe`, + /* Event handling */ + workflowName: oraCompressionIcav2PipelineWorkflowType, + workflowVersion: oraCompressionIcav2PipelineWorkflowTypeVersion, + serviceVersion: oraCompressionIcav2ServiceVersion, + triggerLaunchSource: oraCompressionIcav2ReadyEventSource, + internalEventSource: oraCompressionIcav2EventSource, + detailType: oraCompressionIcav2EventDetailType, + /* Names for statemachines */ + stateMachinePrefix: oraCompressionStateMachinePrefix, + /* SSM Workflow Parameters */ + referenceUriSsmPath: oraCompressionDefaultReferenceUriSSmParameterPath, + pipelineIdSsmPath: oraCompressionIcav2PipelineIdSSMParameterPath, // List of parameters the workflow session state machine will need access to + }; +}; diff --git a/config/stacks/oraDecompressionPipelineManager.ts b/config/stacks/oraDecompressionPipelineManager.ts new file mode 100644 index 000000000..c6369685c --- /dev/null +++ b/config/stacks/oraDecompressionPipelineManager.ts @@ -0,0 +1,28 @@ +import { + AppStage, + eventBusName, + icav2AccessTokenSecretName, + oraDecompressionIcav2ReadyEventSource, + oraDecompressionIcav2EventSource, + oraDecompressionIcav2EventDetailType, + oraDecompressionStateMachinePrefix, +} from '../constants'; +import { OraDecompressionPipelineManagerConfig } from '../../lib/workload/stateless/stacks/ora-decompression-manager/deploy'; + +// Stateless +export const getOraDecompressionManagerStackProps = ( + stage: AppStage +): OraDecompressionPipelineManagerConfig => { + return { + /* ICAv2 Pipeline analysis essentials */ + icav2TokenSecretId: icav2AccessTokenSecretName[stage], // "/icav2/umccr-prod/service-production-jwt-token-secret-arn" + /* Internal and external buses */ + eventBusName: eventBusName, + triggerEventSource: oraDecompressionIcav2ReadyEventSource, + outputEventSource: oraDecompressionIcav2EventSource, + /* Event handling */ + detailType: oraDecompressionIcav2EventDetailType, + /* Names for statemachines */ + stateMachinePrefix: oraDecompressionStateMachinePrefix, + }; +}; diff --git a/lib/workload/components/icav2-copy-files-batch/index.ts b/lib/workload/components/icav2-copy-files-batch/index.ts index 03fb9b966..c0d98b378 100644 --- a/lib/workload/components/icav2-copy-files-batch/index.ts +++ b/lib/workload/components/icav2-copy-files-batch/index.ts @@ -54,7 +54,7 @@ export class ICAv2CopyBatchUtilityConstruct extends Construct { }); // Add execution permissions to stateMachine role - manifestInverterLambda.currentVersion.grantInvoke(this.icav2CopyFilesBatchSfnObj.role); + manifestInverterLambda.currentVersion.grantInvoke(this.icav2CopyFilesBatchSfnObj); // Because we run a nested state machine, we need to add the permissions to the state machine role // See https://stackoverflow.com/questions/60612853/nested-step-function-in-a-step-function-unknown-error-not-authorized-to-cr @@ -68,6 +68,6 @@ export class ICAv2CopyBatchUtilityConstruct extends Construct { ); // Add state machine execution permissions to stateMachineBatch role - this.icav2CopyFilesSfnObj.grantStartExecution(this.icav2CopyFilesBatchSfnObj.role); + this.icav2CopyFilesSfnObj.grantStartExecution(this.icav2CopyFilesBatchSfnObj); } } diff --git a/lib/workload/components/icav2-copy-files/index.ts b/lib/workload/components/icav2-copy-files/index.ts index 719da7e7a..e43ac078b 100644 --- a/lib/workload/components/icav2-copy-files/index.ts +++ b/lib/workload/components/icav2-copy-files/index.ts @@ -1,7 +1,6 @@ import { Duration } from 'aws-cdk-lib'; import { Construct } from 'constructs'; import * as lambda from 'aws-cdk-lib/aws-lambda'; -import * as iam from 'aws-cdk-lib/aws-iam'; import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; import * as secretsManager from 'aws-cdk-lib/aws-secretsmanager'; import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; @@ -35,9 +34,7 @@ export class ICAv2CopyFilesConstruct extends Construct { }); // Allow launch job lambda to read the secret - props.icav2JwtSecretParameterObj.grantRead( - check_or_launch_job_lambda.currentVersion.role - ); + props.icav2JwtSecretParameterObj.grantRead(check_or_launch_job_lambda.currentVersion); // Specify the single statemachine and replace the arn placeholders with the lambda arns defined above this.icav2CopyFilesSfnObj = new sfn.StateMachine(this, 'copy_single_state_machine', { @@ -54,6 +51,6 @@ export class ICAv2CopyFilesConstruct extends Construct { }); // Add execution permissions to stateMachine role - check_or_launch_job_lambda.currentVersion.grantInvoke(this.icav2CopyFilesSfnObj.role); + check_or_launch_job_lambda.currentVersion.grantInvoke(this.icav2CopyFilesSfnObj); } } diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts b/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts new file mode 100644 index 000000000..b32d47c5a --- /dev/null +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 + +import { Construct } from 'constructs'; +import * as ecs from 'aws-cdk-lib/aws-ecs'; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; +import * as secretsManager from 'aws-cdk-lib/aws-secretsmanager'; +import * as ecrAssets from 'aws-cdk-lib/aws-ecr-assets'; +import { NagSuppressions } from 'cdk-nag'; +import path from 'path'; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import { RetentionDays } from 'aws-cdk-lib/aws-logs'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as cdk from 'aws-cdk-lib'; + +export interface OraDecompressionConstructProps { + sfnPrefix: string; + icav2AccessTokenSecretId: string; +} + +export class OraDecompressionConstruct extends Construct { + public readonly sfnObject: sfn.StateMachine; + + constructor(scope: Construct, id: string, props: OraDecompressionConstructProps) { + super(scope, id); + + // Set up task definition and cluster + // a fargate cluster we use for non-lambda Tasks + // we sometimes need to execute tasks in a VPC context so we need one of these + const vpc = ec2.Vpc.fromLookup(this, 'MainVpc', { + vpcName: 'main-vpc', + }); + const cluster = new ecs.Cluster(this, 'FargateCluster', { + vpc: vpc, + enableFargateCapacityProviders: true, + containerInsights: true, + }); + const taskDefinition = new ecs.FargateTaskDefinition(this, 'FargateTaskDefinition', { + runtimePlatform: { + cpuArchitecture: ecs.CpuArchitecture.ARM64, + }, + cpu: 8192, // Maps to 8 CPUs + // For 8 CPU: + // Available memory values: + // Between 16384 (16 GB) and 61440 (60 GB) in increments of 4096 (4 GB) + memoryLimitMiB: 16384, + }); + // We also need a security group context to run the task in + const securityGroup = new ec2.SecurityGroup(this, 'SecurityGroup', { + vpc, + }); + + // Generate the docker image asset + const architecture = lambda.Architecture.ARM_64; + const oraDecompressionImage = new ecrAssets.DockerImageAsset(this, 'OraDecompression', { + directory: path.join(__dirname, 'tasks', 'ora_decompression'), + buildArgs: { + TARGETPLATFORM: architecture.dockerPlatform, + }, + }); + + // FIXME - cdk nag error on fargate task definition role + // { + // "Action": "ecr:GetAuthorizationToken", + // "Effect": "Allow", + // "Resource": "*" + // }, + NagSuppressions.addResourceSuppressions(taskDefinition, [ + { + id: 'AwsSolutions-IAM5', + reason: 'Fargate has GetAuthorizationToken permission on all resources by default', + }, + ]); + + // Add permission to task role + const icav2SecretObj = secretsManager.Secret.fromSecretNameV2( + this, + 'icav2SecretObject', + props.icav2AccessTokenSecretId + ); + icav2SecretObj.grantRead(taskDefinition.taskRole); + + // Add container to task role + const oraDecompressionContainer = taskDefinition.addContainer('oraDecompressionContainer', { + image: ecs.ContainerImage.fromDockerImageAsset(oraDecompressionImage), + containerName: `${props.sfnPrefix}-orad-container`, + logging: ecs.LogDriver.awsLogs({ + streamPrefix: 'orad', + logRetention: RetentionDays.ONE_WEEK, + }), + }); + + // Set up step function + // Build state machine object + this.sfnObject = new sfn.StateMachine(this, 'state_machine', { + stateMachineName: `${props.sfnPrefix}-ora-decompression-sfn`, + definitionBody: sfn.DefinitionBody.fromFile( + path.join( + __dirname, + 'step_functions_templates/decompress_ora_fastq_pair_sfn_template.asl.json' + ) + ), + definitionSubstitutions: { + /* ICAv2 Secret ID */ + __icav2_access_token_secret_id__: icav2SecretObj.secretName, + /* Task Definition and Cluster ARNs */ + __ora_decompression_cluster_arn__: cluster.clusterArn, + __ora_task_definition_arn__: taskDefinition.taskDefinitionArn, + __ora_container_name__: oraDecompressionContainer.containerName, + __subnets__: cluster.vpc.privateSubnets.map((subnet) => subnet.subnetId).join(','), + __sg_group__: securityGroup.securityGroupId, + }, + }); + + // Allow step function to run the ECS task + taskDefinition.grantRun(this.sfnObject); + + /* Grant the state machine access to monitor the tasks */ + this.sfnObject.addToRolePolicy( + new iam.PolicyStatement({ + resources: [ + `arn:aws:events:${cdk.Aws.REGION}:${cdk.Aws.ACCOUNT_ID}:rule/StepFunctionsGetEventsForECSTaskRule`, + ], + actions: ['events:PutTargets', 'events:PutRule', 'events:DescribeRule'], + }) + ); + } +} diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/step_functions_templates/decompress_ora_fastq_pair_sfn_template.asl.json b/lib/workload/components/ora-file-decompression-fq-pair-sfn/step_functions_templates/decompress_ora_fastq_pair_sfn_template.asl.json new file mode 100644 index 000000000..e1ad469c9 --- /dev/null +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/step_functions_templates/decompress_ora_fastq_pair_sfn_template.asl.json @@ -0,0 +1,84 @@ +{ + "Comment": "A description of my state machine", + "StartAt": "Convert Fastq List Row ORA to Array", + "States": { + "Convert Fastq List Row ORA to Array": { + "Type": "Pass", + "Next": "Decompress ORA Files", + "Parameters": { + "ora_files_map": [ + { + "input_uri.$": "$.read1OraFileUri", + "output_uri.$": "$.read1GzOutputFileUri", + "estimated_gz_file_size.$": "States.Format('{}', $.read1EstimatedGzFileSize)" + }, + { + "input_uri.$": "$.read2OraFileUri", + "output_uri.$": "$.read2GzOutputFileUri", + "estimated_gz_file_size.$": "States.Format('{}', $.read2EstimatedGzFileSize)" + } + ] + }, + "ResultPath": "$.fastq_list_row_as_map" + }, + "Decompress ORA Files": { + "Type": "Map", + "ItemsPath": "$.fastq_list_row_as_map.ora_files_map", + "ItemProcessor": { + "ProcessorConfig": { + "Mode": "INLINE" + }, + "StartAt": "Decompress ORA File", + "States": { + "Decompress ORA File": { + "Type": "Task", + "Resource": "arn:aws:states:::ecs:runTask.sync", + "Parameters": { + "LaunchType": "FARGATE", + "Cluster": "${__ora_decompression_cluster_arn__}", + "TaskDefinition": "${__ora_task_definition_arn__}", + "NetworkConfiguration": { + "AwsvpcConfiguration": { + "Subnets.$": "States.StringSplit('${__subnets__}', ',')", + "SecurityGroups.$": "States.Array('${__sg_group__}')" + } + }, + "Overrides": { + "ContainerOverrides": [ + { + "Name": "${__ora_container_name__}", + "Environment": [ + { + "Name": "INPUT_URI", + "Value.$": "$.input_uri" + }, + { + "Name": "OUTPUT_URI", + "Value.$": "$.output_uri" + }, + { + "Name": "ESTIMATED_GZ_FILE_SIZE", + "Value.$": "$.estimated_gz_file_size" + }, + { + "Name": "ICAV2_ACCESS_TOKEN_SECRET_ID", + "Value": "${__icav2_access_token_secret_id__}" + } + ] + } + ] + } + }, + "TimeoutSeconds": 7200, + "End": true + } + } + }, + "ResultPath": "$.fastq_list_row_as_map", + "Next": "Success" + }, + "Success": { + "Type": "Succeed" + } + } +} diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/Dockerfile b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/Dockerfile new file mode 100644 index 000000000..bc31b827d --- /dev/null +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/Dockerfile @@ -0,0 +1,93 @@ +FROM --platform=${TARGETPLATFORM} ubuntu:noble + +ARG ORAD_REF_DOWNLOAD_URL="https://s3.amazonaws.com/webdata.illumina.com/downloads/software/dragen-decompression/orad.2.7.0.linux.tar.gz" +ARG ORA_MOUNT="/opt" +ARG TARGETPLATFORM + +# Install ubuntu basics +RUN \ + if [ "${TARGETPLATFORM#linux/}" = "arm64" ]; then \ + platform_url="aarch64"; \ + else \ + platform_url="x86_64"; \ + fi && \ + apt update -yq && \ + apt upgrade -yq && \ + apt install -yq \ + wget \ + jq \ + tar \ + gzip \ + unzip \ + python3.12 \ + python3-pip && \ + python3 -mpip install wrapica --break-system-packages && \ + wget \ + --quiet \ + --output-document "awscliv2.zip" \ + "https://awscli.amazonaws.com/awscli-exe-linux-${platform_url}.zip" && \ + unzip -q "awscliv2.zip" && \ + ./aws/install && \ + rm "awscliv2.zip" + +## Install qemu-virtualization +## Commented out as we now have an orad arm64 binary version +## Add amd64 sources +#RUN \ +# apt install -yq \ +# qemu-user-static \ +# binfmt-support && \ +# echo "Adding multi-arch package sources" 1>2 && \ +# rm "/etc/apt/sources.list.d/ubuntu.sources" && \ +# echo 'deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main restricted universe multiverse' >> '/etc/apt/sources.list.d/ubuntu.list' && \ +# echo 'deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main restricted universe multiverse' >> '/etc/apt/sources.list.d/ubuntu.list' && \ +# echo 'deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main restricted universe multiverse' >> '/etc/apt/sources.list.d/ubuntu.list' && \ +# echo 'deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-backports main restricted universe multiverse' >> '/etc/apt/sources.list.d/ubuntu.list' && \ +# echo 'deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe restricted multiverse' >> '/etc/apt/sources.list.d/ubuntu.list' && \ +# echo 'deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe restricted multiverse' >> '/etc/apt/sources.list.d/ubuntu.list' && \ +# echo 'deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe restricted multiverse' >> '/etc/apt/sources.list.d/ubuntu.list' && \ +# echo 'deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe restricted multiverse' >> '/etc/apt/sources.list.d/ubuntu.list' && \ +# dpkg --add-architecture amd64 && \ +# apt update -yq && \ +# apt install -yq \ +# libc6:amd64 \ +# uuid-dev:amd64 + +# Install the ora reference data directly into the docker container +# Remove the orad x86-64 binary since we use the arm64 binary provided instead +RUN \ + mkdir --parents --mode 744 "${ORA_MOUNT}" && \ + wget --quiet \ + --output-document /dev/stdout \ + "${ORAD_REF_DOWNLOAD_URL}" | \ + tar \ + --directory "${ORA_MOUNT}" \ + --extract \ + --gunzip \ + --strip-components=1 \ + --file - && \ + chmod 644 "${ORA_MOUNT}/oradata/" && \ + chmod a+x "${ORA_MOUNT}/orad" && \ + rm "${ORA_MOUNT}/orad" + +# Set the ora reference to the path ORA_MOUNT/oradata +ENV ORADATA_PATH="${ORA_MOUNT}/oradata/" + +# Add the orad binary to the docker container +# And make it executable +ADD binaries/orad.tar.gz /usr/local/bin/ +RUN chmod +x /usr/local/bin/orad + +# Copy the scripts to the docker container +# Make the scripts executable +COPY scripts/ scripts/ +RUN chmod +x scripts/* + +# Copy the docker entrypoint to the docker container +COPY docker-entrypoint.sh docker-entrypoint.sh +# Make the docker entrypoint executable +RUN chmod +x "./docker-entrypoint.sh" + +# Set the entrypoint as the docker entrypoint script +CMD [ "./docker-entrypoint.sh" ] + diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/binaries/orad.tar.gz b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/binaries/orad.tar.gz new file mode 100644 index 000000000..cfbe406b2 --- /dev/null +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/binaries/orad.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e827eaeb11c7112fac25da1e0089bee56ad0cc529633f29b55070db3da8c22 +size 3143268 diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/docker-entrypoint.sh b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/docker-entrypoint.sh new file mode 100644 index 000000000..4da18eeef --- /dev/null +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/docker-entrypoint.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env bash + +# Set to fail +set -euo pipefail + +# Set python3 version +hash -p /usr/bin/python3.12 python3 + +# ENVIRONMENT VARIABLES +# Inputs +if [[ ! -v INPUT_URI ]]; then + echo "$(date -Iseconds): Error! Expected env var 'INPUT_URI' but was not found" 1>&2 + exit 1 +fi +if [[ ! -v OUTPUT_URI ]]; then + echo "$(date -Iseconds): Error! Expected env var 'OUTPUT_URI' but was not found" 1>&2 + exit 1 +fi + +# Required for aws s3 cp to upload a file from stdin +if [[ ! -v ESTIMATED_GZ_FILE_SIZE ]]; then + echo "$(date -Iseconds): Error! Expected env var 'ESTIMATED_GZ_FILE_SIZE' but was not found" 1>&2 + exit 1 +fi + +# ICAV2 ENV VARS +export ICAV2_BASE_URL="https://ica.illumina.com/ica/rest" + +# SECRET KEY FOR ICAV2 +if [[ ! -v ICAV2_ACCESS_TOKEN_SECRET_ID ]]; then + echo "$(date -Iseconds): Error! Expected env var 'ICAV2_ACCESS_TOKEN_SECRET_ID' but was not found" 1>&2 + exit 1 +fi + +echo "$(date -Iseconds): Collecting the ICAV2 access token" 1>&2 +# Get the ICAV2 access token +ICAV2_ACCESS_TOKEN="$( \ + aws secretsmanager get-secret-value \ + --secret-id "${ICAV2_ACCESS_TOKEN_SECRET_ID}" \ + --output text \ + --query SecretString +)" +export ICAV2_ACCESS_TOKEN + +# Download reference +# Section commented out as reference is built-in to the fargate task +# This may change in future if different references are used +#echo "$(date -Iseconds): Downloading the icav2 reference file" 1>&2 +#python3 scripts/download_icav2_file.py \ +# "${REFERENCE_URI}" \ +# "oradata-v2.tar.gz" +#tar -xf "oradata-v2.tar.gz" + +# Set AWS credentials access for aws s3 cp +echo "$(date -Iseconds): Collecting the AWS S3 Access credentials" 1>&2 +aws_s3_access_creds_json_str="$( \ + python3 scripts/get_aws_credentials_access.py \ + "$(dirname "${OUTPUT_URI}")/" +)" + +# Use a file descriptor to emulate the ora file +# Write the gzipped ora file to stdout +echo "$(date -Iseconds): Starting stream and decompression of the ora input file" 1>&2 +# Prefix with qemu-x86_64-static +# when using the orad x86_64 binary +# but we have the arm binary +# qemu-x86_64-static \ # Uncomment this line! + +/usr/local/bin/orad \ + --gz \ + --gz-level 1 \ + --stdout \ + --ora-reference "${ORADATA_PATH}" \ + <( \ + wget \ + --quiet \ + --output-document /dev/stdout \ + "$( \ + python3 scripts/get_icav2_download_url.py \ + "${INPUT_URI}" + )" \ + ) \ + | \ +( + AWS_ACCESS_KEY_ID="$( \ + jq -r '.AWS_ACCESS_KEY_ID' <<< "${aws_s3_access_creds_json_str}" + )" \ + AWS_SECRET_ACCESS_KEY="$( \ + jq -r '.AWS_SECRET_ACCESS_KEY' <<< "${aws_s3_access_creds_json_str}" + )" \ + AWS_SESSION_TOKEN="$( \ + jq -r '.AWS_SESSION_TOKEN' <<< "${aws_s3_access_creds_json_str}" + )" \ + AWS_REGION="$( \ + jq -r '.AWS_REGION' <<< "${aws_s3_access_creds_json_str}" + )" \ + aws s3 cp \ + --expected-size="${ESTIMATED_GZ_FILE_SIZE}" \ + --sse=AES256 \ + - \ + "$( \ + python3 scripts/get_s3_uri.py \ + "$(dirname "${OUTPUT_URI}")/" \ + )$( \ + basename "${OUTPUT_URI}" \ + )" +) +echo "$(date -Iseconds): Stream and upload of decompression complete" 1>&2 diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/download_icav2_file.py b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/download_icav2_file.py new file mode 100644 index 000000000..bb3c4244b --- /dev/null +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/download_icav2_file.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 + +# Standard library imports +import sys +from pathlib import Path +from wrapica.project_data import ( + ProjectData, + convert_uri_to_project_data_obj, + write_icav2_file_contents +) + + +def main(): + # Get input + input_uri = sys.argv[1] + output_file = Path(sys.argv[2]) + + # Step 1 - Convert URI to projectdata object + project_data_obj: ProjectData = convert_uri_to_project_data_obj(input_uri) + + # Write project data object to file + write_icav2_file_contents( + project_data_obj.project_id, + project_data_obj.data.details.path, + output_file + ) + + +if __name__ == "__main__": + main() diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_aws_credentials_access.py b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_aws_credentials_access.py new file mode 100644 index 000000000..35cb32033 --- /dev/null +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_aws_credentials_access.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 + +# Standard library imports +import sys +from pathlib import Path +import json + +# Wrapica imports +from wrapica.project_data import ( + ProjectData, + convert_uri_to_project_data_obj, + get_project_data_folder_id_from_project_id_and_path, + get_aws_credentials_access_for_project_folder +) +from wrapica.libica_models import AwsTempCredentials + + +def main(): + # Get input + output_uri_dir = sys.argv[1] + + # Step 1 - Convert URI to project data object + project_data_folder_obj: ProjectData = convert_uri_to_project_data_obj( + output_uri_dir, + create_data_if_not_found=True + ) + + # Step 3 - Get AWS credentials access + aws_temp_credentials_access: AwsTempCredentials = get_aws_credentials_access_for_project_folder( + project_data_folder_obj.project_id, + project_data_folder_obj.data.id + ) + + # Write project data object to file + print( + json.dumps( + { + "AWS_ACCESS_KEY_ID": aws_temp_credentials_access.access_key, + "AWS_SECRET_ACCESS_KEY": aws_temp_credentials_access.secret_key, + "AWS_SESSION_TOKEN": aws_temp_credentials_access.session_token, + "AWS_REGION": aws_temp_credentials_access.region + }, + indent=4 + ) + ) + + +if __name__ == "__main__": + main() diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_icav2_download_url.py b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_icav2_download_url.py new file mode 100644 index 000000000..71c89496e --- /dev/null +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_icav2_download_url.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +# Standard library imports +import sys +from wrapica.project_data import ( + ProjectData, + convert_uri_to_project_data_obj, + create_download_url +) + + +def main(): + # Get input + input_uri = sys.argv[1] + + # Step 1 - Convert URI to projectdata object + project_data_obj: ProjectData = convert_uri_to_project_data_obj(input_uri) + + # Write project data object to file + print( + create_download_url( + project_data_obj.project_id, + project_data_obj.data.id + ) + ) + + +if __name__ == "__main__": + main() diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_s3_uri.py b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_s3_uri.py new file mode 100644 index 000000000..67750079f --- /dev/null +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/tasks/ora_decompression/scripts/get_s3_uri.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +# Standard library imports +import sys +from pathlib import Path + +from wrapica.enums import UriType +from wrapica.project_data import ( + ProjectData, + convert_uri_to_project_data_obj, + convert_project_data_obj_to_uri +) + + +def main(): + # Get input + input_uri = sys.argv[1] + + # Step 1 - Convert URI to projectdata object + project_data_obj: ProjectData = convert_uri_to_project_data_obj( + input_uri, + create_data_if_not_found=True + ) + + # Write out as s3 uri + print( + convert_project_data_obj_to_uri( + project_data_obj, + uri_type=UriType.S3 + ) + ) + + +if __name__ == "__main__": + main() diff --git a/lib/workload/components/python-lambda-get-metadata-objects-from-samplesheet/index.ts b/lib/workload/components/python-lambda-get-metadata-objects-from-samplesheet/index.ts index 76dfdf63c..0fcfbc5cb 100644 --- a/lib/workload/components/python-lambda-get-metadata-objects-from-samplesheet/index.ts +++ b/lib/workload/components/python-lambda-get-metadata-objects-from-samplesheet/index.ts @@ -2,7 +2,6 @@ import { Construct } from 'constructs'; import * as lambda from 'aws-cdk-lib/aws-lambda'; import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; import path from 'path'; -import * as iam from 'aws-cdk-lib/aws-iam'; import { MetadataToolsPythonLambdaLayer } from '../python-metadata-tools-layer'; import { Duration } from 'aws-cdk-lib'; import * as ssm from 'aws-cdk-lib/aws-ssm'; @@ -60,9 +59,9 @@ export class GetLibraryObjectsFromSamplesheetConstruct extends Construct { }); // Allow the lambda to read the secret - orcabusTokenSecretObj.grantRead(this.lambdaObj.currentVersion.role); + orcabusTokenSecretObj.grantRead(this.lambdaObj.currentVersion); // Allow the lambda to read the ssm parameter - hostnameSsmParameterObj.grantRead(this.lambdaObj.currentVersion.role); + hostnameSsmParameterObj.grantRead(this.lambdaObj.currentVersion); } } diff --git a/lib/workload/components/python-lambda-uuid-generator-function/Readme.md b/lib/workload/components/python-lambda-uuid-generator-function/Readme.md deleted file mode 100644 index efca39d8b..000000000 --- a/lib/workload/components/python-lambda-uuid-generator-function/Readme.md +++ /dev/null @@ -1,5 +0,0 @@ -# Lambda UUID Generator - -Simple lambda function to generate a timestamp-sortable UUID. - -No inputs required \ No newline at end of file diff --git a/lib/workload/components/python-lambda-uuid-generator-function/index.ts b/lib/workload/components/python-lambda-uuid-generator-function/index.ts deleted file mode 100644 index 999f20303..000000000 --- a/lib/workload/components/python-lambda-uuid-generator-function/index.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { Construct } from 'constructs'; -import * as lambda_python from '@aws-cdk/aws-lambda-python-alpha'; -import * as lambda from 'aws-cdk-lib/aws-lambda'; -import path from 'path'; -import { Duration } from 'aws-cdk-lib'; - -export class PythonLambdaUuidConstruct extends Construct { - public readonly lambdaObj: lambda_python.PythonFunction; - - constructor(scope: Construct, id: string) { - super(scope, id); - - this.lambdaObj = new lambda_python.PythonFunction(this, 'uuid_python_function', { - runtime: lambda.Runtime.PYTHON_3_12, - architecture: lambda.Architecture.ARM_64, - entry: path.join(__dirname, 'uuid_py'), - index: 'run_uuid.py', - handler: 'handler', - memorySize: 1024, - timeout: Duration.seconds(3), - }); - } -} diff --git a/lib/workload/components/python-lambda-uuid-generator-function/uuid_py/requirements.txt b/lib/workload/components/python-lambda-uuid-generator-function/uuid_py/requirements.txt deleted file mode 100644 index c70dbb5de..000000000 --- a/lib/workload/components/python-lambda-uuid-generator-function/uuid_py/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -uuid6>=2024.1.12 \ No newline at end of file diff --git a/lib/workload/components/python-lambda-uuid-generator-function/uuid_py/run_uuid.py b/lib/workload/components/python-lambda-uuid-generator-function/uuid_py/run_uuid.py deleted file mode 100644 index a937397ec..000000000 --- a/lib/workload/components/python-lambda-uuid-generator-function/uuid_py/run_uuid.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env python3 - -""" -Generate a random uuid via the uuid7 module -'018e83b0-ca5d-7be7-aeb6-28fc75038316' -These are time-based UUIDs, with the timestamp encoded in the first 48 bits. -""" - -from uuid6 import uuid7 - -def handler(event, context): - return { - "db_uuid": str(uuid7()) - } diff --git a/lib/workload/components/sfn-generate-workflowrunstatechange-ready-event/index.ts b/lib/workload/components/sfn-generate-workflowrunstatechange-ready-event/index.ts index 84f47b8d7..333df31db 100644 --- a/lib/workload/components/sfn-generate-workflowrunstatechange-ready-event/index.ts +++ b/lib/workload/components/sfn-generate-workflowrunstatechange-ready-event/index.ts @@ -118,13 +118,13 @@ export class GenerateWorkflowRunStateChangeReadyConstruct extends Construct { // Skip return; } - ssmParameterObj.grantRead(engineParameterGeneratorStateMachineSfn.role); + ssmParameterObj.grantRead(engineParameterGeneratorStateMachineSfn); }); // Add permissions for the statemachine to fill in the placeholders [flattenObjectListLambdaObj, fillPlaceholdersInEventPayloadDataLambdaObj].forEach( (lambdaObj) => { - lambdaObj.currentVersion.grantInvoke(engineParameterGeneratorStateMachineSfn.role); + lambdaObj.currentVersion.grantInvoke(engineParameterGeneratorStateMachineSfn); } ); diff --git a/lib/workload/components/sfn-icav2-ready-event-handler/index.ts b/lib/workload/components/sfn-icav2-ready-event-handler/index.ts index 5a7153998..883915eb7 100644 --- a/lib/workload/components/sfn-icav2-ready-event-handler/index.ts +++ b/lib/workload/components/sfn-icav2-ready-event-handler/index.ts @@ -7,7 +7,6 @@ import * as events_targets from 'aws-cdk-lib/aws-events-targets'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as cdk from 'aws-cdk-lib'; import * as ssm from 'aws-cdk-lib/aws-ssm'; -import { PythonLambdaUuidConstruct } from '../python-lambda-uuid-generator-function'; import * as lambda_python from '@aws-cdk/aws-lambda-python-alpha'; import * as lambda from 'aws-cdk-lib/aws-lambda'; import { Duration } from 'aws-cdk-lib'; @@ -65,9 +64,6 @@ export class WfmWorkflowStateChangeIcav2ReadyEventHandlerConstruct extends Const props.eventBusName ); - // Build the lambda python function to generate a uuid - const uuid_lambda_obj = new PythonLambdaUuidConstruct(this, 'uuid_python').lambdaObj; - // Build the launch lambda object const launch_lambda_obj = new lambda_python.PythonFunction( this, @@ -87,7 +83,7 @@ export class WfmWorkflowStateChangeIcav2ReadyEventHandlerConstruct extends Const ); // Give the lambda the ability to read the icav2 secret - props.icav2AccessTokenSecretObj.grantRead(launch_lambda_obj.role); + props.icav2AccessTokenSecretObj.grantRead(launch_lambda_obj); // Collect the pipeline id from the ssm parameter store const pipeline_id_ssm_param_obj = ssm.StringParameter.fromStringParameterName( @@ -119,7 +115,6 @@ export class WfmWorkflowStateChangeIcav2ReadyEventHandlerConstruct extends Const __workflow_version__: props.workflowVersion, __service_version__: props.serviceVersion, /* Lambdas */ - __generate_db_uuid_lambda_function_arn__: uuid_lambda_obj.currentVersion.functionArn, __launch_icav2_pipeline_lambda_function_name__: launch_lambda_obj.currentVersion.functionArn, /* SSM Parameter paths */ @@ -129,14 +124,11 @@ export class WfmWorkflowStateChangeIcav2ReadyEventHandlerConstruct extends Const }, }); - /* Grant the state machine access to invoke the dbuuid generator lambda function */ - uuid_lambda_obj.currentVersion.grantInvoke(this.stateMachineObj.role); - /* Grant the state machine access to invoke the launch lambda function */ - launch_lambda_obj.currentVersion.grantInvoke(this.stateMachineObj.role); + launch_lambda_obj.currentVersion.grantInvoke(this.stateMachineObj); /* Grant the state machine access to the ssm parameter path */ - pipeline_id_ssm_param_obj.grantRead(this.stateMachineObj.role); + pipeline_id_ssm_param_obj.grantRead(this.stateMachineObj); /* Grant the state machine access to invoke the internal launch sfn machine */ // Because we run a nested state machine, we need to add the permissions to the state machine role @@ -151,7 +143,7 @@ export class WfmWorkflowStateChangeIcav2ReadyEventHandlerConstruct extends Const ); // Grant the state machine the ability to start the internal generate inputs sfn - props.generateInputsJsonSfn.grantStartExecution(this.stateMachineObj.role); + props.generateInputsJsonSfn.grantStartExecution(this.stateMachineObj); /* Grant the state machine read and write access to the table */ table_obj.grantReadWriteData(this.stateMachineObj); @@ -178,6 +170,6 @@ export class WfmWorkflowStateChangeIcav2ReadyEventHandlerConstruct extends Const ); /* Grant the state machine the ability to submit events to the event bus */ - eventbus_obj.grantPutEventsTo(this.stateMachineObj.role); + eventbus_obj.grantPutEventsTo(this.stateMachineObj); } } diff --git a/lib/workload/components/sfn-icav2-state-change-event-handler/index.ts b/lib/workload/components/sfn-icav2-state-change-event-handler/index.ts index 5efb19d8b..4cd2fdca0 100644 --- a/lib/workload/components/sfn-icav2-state-change-event-handler/index.ts +++ b/lib/workload/components/sfn-icav2-state-change-event-handler/index.ts @@ -119,7 +119,7 @@ export class Icav2AnalysisEventHandlerConstruct extends Construct { ); /* Grant the state machine the ability to submit events to the event bus */ - eventbus_obj.grantPutEventsTo(this.stateMachineObj.role); + eventbus_obj.grantPutEventsTo(this.stateMachineObj); } private coerce_names(name: string) { diff --git a/lib/workload/components/sfn-workflowdraftrunstatechange-common-preamble/index.ts b/lib/workload/components/sfn-workflowdraftrunstatechange-common-preamble/index.ts index 4cfd259b8..7feb547a6 100644 --- a/lib/workload/components/sfn-workflowdraftrunstatechange-common-preamble/index.ts +++ b/lib/workload/components/sfn-workflowdraftrunstatechange-common-preamble/index.ts @@ -9,7 +9,6 @@ import { Construct } from 'constructs'; import * as lambda from 'aws-cdk-lib/aws-lambda'; import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; -import * as iam from 'aws-cdk-lib/aws-iam'; import path from 'path'; import { Duration } from 'aws-cdk-lib'; @@ -86,7 +85,7 @@ export class WorkflowDraftRunStateChangeCommonPreambleConstruct extends Construc */ /* Allow step functions to invoke the lambda */ [workflowRunNameLambda, portalRunIdLambda].forEach((lambdaObj) => { - lambdaObj.currentVersion.grantInvoke(this.stepFunctionObj.role); + lambdaObj.currentVersion.grantInvoke(this.stepFunctionObj); }); } } diff --git a/lib/workload/stateful/stacks/ora-decompression-dynamodb/deploy/stack.ts b/lib/workload/stateful/stacks/ora-decompression-dynamodb/deploy/stack.ts new file mode 100644 index 000000000..c153795c3 --- /dev/null +++ b/lib/workload/stateful/stacks/ora-decompression-dynamodb/deploy/stack.ts @@ -0,0 +1,54 @@ +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; +import { DynamodbPartitionedPipelineConstruct } from '../../../../components/dynamodb-partitioned-table'; + +export interface OraCompressionIcav2PipelineTableConfig { + dynamodbTableName: string; + oraDecompressionIcav2DynamodbTableArnSsmParameterPath: string; + oraDecompressionIcav2DynamodbTableNameSsmParameterPath: string; +} + +export type OraCompressionIcav2PipelineTableStackProps = OraCompressionIcav2PipelineTableConfig & + cdk.StackProps; + +export class OraCompressionIcav2PipelineTable extends cdk.Stack { + public readonly oraCompressionIcav2DynamodbTableArnSsmParameterPath: string; + public readonly oraCompressionIcav2DynamodbTableNameSsmParameterPath: string; + + constructor(scope: Construct, id: string, props: OraCompressionIcav2PipelineTableStackProps) { + super(scope, id, props); + + /* + Initialise dynamodb table, where portal_run_id is the primary sort key + */ + const dynamodb_table = new DynamodbPartitionedPipelineConstruct( + this, + 'ora_decompression_icav2_pipeline_table', + { + tableName: props.dynamodbTableName, + } + ); + + /* + Generate a ssm parameter to store the table arn so it can be referred to be other stacks + */ + this.oraCompressionIcav2DynamodbTableArnSsmParameterPath = new ssm.StringParameter( + this, + 'ora_decompression_icav2_pipeline_table_arn_ssm_path', + { + parameterName: props.oraDecompressionIcav2DynamodbTableArnSsmParameterPath, + stringValue: dynamodb_table.tableNameArn, + } + ).parameterName; + + this.oraCompressionIcav2DynamodbTableNameSsmParameterPath = new ssm.StringParameter( + this, + 'ora_decompression_icav2_pipeline_table_name_ssm_path', + { + parameterName: props.oraDecompressionIcav2DynamodbTableNameSsmParameterPath, + stringValue: props.dynamodbTableName, + } + ).parameterName; + } +} diff --git a/lib/workload/stateful/statefulStackCollectionClass.ts b/lib/workload/stateful/statefulStackCollectionClass.ts index 74c9f7383..a47b3dcf6 100644 --- a/lib/workload/stateful/statefulStackCollectionClass.ts +++ b/lib/workload/stateful/statefulStackCollectionClass.ts @@ -61,6 +61,10 @@ import { SashNfPipelineTable, SashNfPipelineTableStackProps, } from './stacks/sash-dynamodb/deploy/stack'; +import { + OraCompressionIcav2PipelineTable, + OraCompressionIcav2PipelineTableStackProps, +} from './stacks/ora-decompression-dynamodb/deploy/stack'; export interface StatefulStackCollectionProps { dataBucketStackProps: DataBucketStackProps; @@ -76,6 +80,7 @@ export interface StatefulStackCollectionProps { wtsIcav2PipelineTableStackProps: WtsIcav2PipelineTableStackProps; umccriseIcav2PipelineTableStackProps: UmccriseIcav2PipelineTableStackProps; rnasumIcav2PipelineTableStackProps: RnasumIcav2PipelineTableStackProps; + oraCompressionIcav2PipelineTableStackProps: OraCompressionIcav2PipelineTableStackProps; BclConvertTableStackProps: BclConvertTableStackProps; stackyStatefulTablesStackProps: StackyStatefulTablesStackProps; pierianDxPipelineTableStackProps: PierianDxPipelineTableStackProps; @@ -99,6 +104,7 @@ export class StatefulStackCollection { readonly wtsIcav2PipelineTableStack: Stack; readonly umccriseIcav2PipelineTableStack: Stack; readonly rnasumIcav2PipelineTableStack: Stack; + readonly oraCompressionIcav2PipelineTableStack: Stack; readonly BclConvertTableStack: Stack; readonly stackyStatefulTablesStack: Stack; readonly pierianDxPipelineTableStack: Stack; @@ -207,6 +213,15 @@ export class StatefulStackCollection { } ); + this.oraCompressionIcav2PipelineTableStack = new OraCompressionIcav2PipelineTable( + scope, + 'OraCompressionIcav2PipelineTableStack', + { + ...this.createTemplateProps(env, 'OraCompressionIcav2PipelineTableStack'), + ...statefulConfiguration.oraCompressionIcav2PipelineTableStackProps, + } + ); + this.BclConvertTableStack = new BclConvertTable(scope, 'BclConvertTableStack', { ...this.createTemplateProps(env, 'BclConvertTableStack'), ...statefulConfiguration.BclConvertTableStackProps, diff --git a/lib/workload/stateless/stacks/bclconvert-interop-qc-pipeline-manager/deploy/constructs/bclconvert-interop-qc-step-function/index.ts b/lib/workload/stateless/stacks/bclconvert-interop-qc-pipeline-manager/deploy/constructs/bclconvert-interop-qc-step-function/index.ts index 2706543e0..1d51b5606 100644 --- a/lib/workload/stateless/stacks/bclconvert-interop-qc-pipeline-manager/deploy/constructs/bclconvert-interop-qc-step-function/index.ts +++ b/lib/workload/stateless/stacks/bclconvert-interop-qc-pipeline-manager/deploy/constructs/bclconvert-interop-qc-step-function/index.ts @@ -83,7 +83,7 @@ export class BclConvertInteropQcIcav2PipelineConstruct extends Construct { ); // Give the lambda function access to the secret - props.icav2AccessTokenSecretObj.grantRead(set_outputs_json_lambda_function.role); + props.icav2AccessTokenSecretObj.grantRead(set_outputs_json_lambda_function.currentVersion); // Generate outputs const configure_outputs_sfn = new sfn.StateMachine(this, 'configure_outputs_sfn', { diff --git a/lib/workload/stateless/stacks/ora-compression-manager/Readme.md b/lib/workload/stateless/stacks/ora-compression-manager/Readme.md new file mode 100644 index 000000000..da0c4e6d4 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-compression-manager/Readme.md @@ -0,0 +1,140 @@ +# Kick off the process + +```json5 +{ + "detail-type": "WorkflowRunStateChange", + "source": "orcabus.oracompressiontest", + "detail": { + "status": "READY", + "portalRunId": "202410306290aa4d", + "workflowName": "ora-compression", + "workflowVersion": "4.2.4--v2", + "workflowRunName": "umccr--automated--ora-compression--2024-10-30--202410306290aa4d", + "payload": { + "version": "2024.07.01", + "data": { + "inputs": { + "instrumentRunDirectoryUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/241024_A00130_0336_BHW7MVDSXC/20241030c613872c/", + "instrumentRunId": "241024_A00130_0336_BHW7MVDSXC" + }, + "engineParameters": { + "outputUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/ora-compression/202410306290aa4d" + } + }, + "tags": { + "instrumentRunId": "241024_A00130_0336_BHW7MVDSXC" + } + }, + "linkedLibraries": [ + { + "libraryId": "L2401499", + "orcabusId": "lib.01J9T9CM4RZMBSACRK7R4DPJZD" + }, + { + "libraryId": "L2401526", + "orcabusId": "lib.01JBB5Y205V55QZPFBHCGGMJH6" + }, + { + "libraryId": "L2401527", + "orcabusId": "lib.01JBB5Y22Q2CPCZR69JS6EK8QP" + }, + { + "libraryId": "L2401528", + "orcabusId": "lib.01JBB5Y26QQ4GBSVK48W1XWD6T" + }, + { + "libraryId": "L2401529", + "orcabusId": "lib.01JBB5Y2APRAEVKTXHC14BV5QV" + }, + { + "libraryId": "L2401530", + "orcabusId": "lib.01JBB5Y2EHZZ850EN28EWKH55H" + }, + { + "libraryId": "L2401531", + "orcabusId": "lib.01JBB5Y2H072091A41CKQTNC5C" + }, + { + "libraryId": "L2401532", + "orcabusId": "lib.01JBB5Y2KCNZD76XSP2SZZCRPJ" + }, + { + "libraryId": "L2401533", + "orcabusId": "lib.01JBB5Y2QJA4ECHFMNRY1DESHP" + }, + { + "libraryId": "L2401534", + "orcabusId": "lib.01JBB5Y2WRMS894XNSDJEWNSQD" + }, + { + "libraryId": "L2401535", + "orcabusId": "lib.01JBB5Y31GBQHP4X90PZ28X14R" + }, + { + "libraryId": "L2401536", + "orcabusId": "lib.01JBB5Y33XXNSGDDHX48413FMK" + }, + { + "libraryId": "L2401537", + "orcabusId": "lib.01JBB5Y36HJ7TJZJ3Q4BXD7J07" + }, + { + "libraryId": "L2401538", + "orcabusId": "lib.01JBB5Y3901PA0X3FBMWBKYNMB" + }, + { + "libraryId": "L2401539", + "orcabusId": "lib.01JBB5Y3BE906M3CXFBQGV2FKE" + }, + { + "libraryId": "L2401540", + "orcabusId": "lib.01JBB5Y3DZ55KF4D5KVMJP7DSN" + }, + { + "libraryId": "L2401541", + "orcabusId": "lib.01JBB5Y3GAN479FC5MJG19HPJM" + }, + { + "libraryId": "L2401542", + "orcabusId": "lib.01JBB5Y3JN6A228K20Y5XC549F" + }, + { + "libraryId": "L2401543", + "orcabusId": "lib.01JBB5Y3N1VHKBRSKEV4B3KZXN" + }, + { + "libraryId": "L2401544", + "orcabusId": "lib.01JBB5Y3QGZSGF74W6CTV0JJ16" + }, + { + "libraryId": "L2401545", + "orcabusId": "lib.01JBB5Y3TF8CPCSGC6KGCW3RBM" + }, + { + "libraryId": "L2401546", + "orcabusId": "lib.01JBB5Y3YMTRWYNT4F6DAS3YP7" + }, + { + "libraryId": "L2401547", + "orcabusId": "lib.01JBB5Y411MZ9J2JSGX332MD3T" + }, + { + "libraryId": "L2401548", + "orcabusId": "lib.01JBB5Y44ZSWKBXJJFHRHJ94CK" + }, + { + "libraryId": "L2401549", + "orcabusId": "lib.01JBB5Y47CCZW8RTC4YF8NDSAP" + }, + { + "libraryId": "L2401552", + "orcabusId": "lib.01JBB5Y4HG0MFWTHSXQQSHR90F" + }, + { + "libraryId": "L2401553", + "orcabusId": "lib.01JBB5Y4NNR06DNJQ7R9JGPFY9" + } + ] + } +} +``` \ No newline at end of file diff --git a/lib/workload/stateless/stacks/ora-compression-manager/deploy/index.ts b/lib/workload/stateless/stacks/ora-compression-manager/deploy/index.ts new file mode 100644 index 000000000..90a41e918 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-compression-manager/deploy/index.ts @@ -0,0 +1,284 @@ +/* +Standard ICAv2 compression manager + +Uses the ICAv2 framework with a custom sfn inputs and outputs. + +Then also listens to the workflowManager success events with an 'add-on' step function that performs the following: + +1. Reads in the fastq_gzipped.filesizes.tsv file. +2. Reads in the fastq_gzipped.md5.txt file. +3. Reads in the fastq_ora.md5.txt file +4. Reads in the fastq_ora.filesizes.tsv file. +5. Reads in the fastq_list_ora.csv file. + +Generates an event for each fastq file pair with the following +1. The md5sum for both the compressed and uncompressed files +2. The file sizes for both the gzipped compressed and ora compressed files +3. The ratio of the gzipped compressed file size to the ora compressed file size + +From here we can start to determine if the compression ratio is worth the FPGA costs or not. + +*/ + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; +import * as secretsManager from 'aws-cdk-lib/aws-secretsmanager'; +import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; +import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; +import * as events from 'aws-cdk-lib/aws-events'; +import path from 'path'; +import { Duration } from 'aws-cdk-lib'; +import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; +import { Architecture, Runtime } from 'aws-cdk-lib/aws-lambda'; +import * as eventsTargets from 'aws-cdk-lib/aws-events-targets'; +import { WfmWorkflowStateChangeIcav2ReadyEventHandlerConstruct } from '../../../../components/sfn-icav2-ready-event-handler'; +import { Icav2AnalysisEventHandlerConstruct } from '../../../../components/sfn-icav2-state-change-event-handler'; + +export interface OraCompressionIcav2PipelineManagerConfig { + /* + Tables + */ + dynamodbTableName: string; + + /* + Event handling + */ + eventBusName: string; + icaEventPipeName: string; + workflowName: string; + workflowVersion: string; + serviceVersion: string; + triggerLaunchSource: string; + internalEventSource: string; + detailType: string; + + /* + Names for statemachines + */ + stateMachinePrefix: string; + + /* + SSM Parameters + */ + referenceUriSsmPath: string; + pipelineIdSsmPath: string; // List of parameters the workflow session state machine will need access to + + /* + Secrets + */ + icav2TokenSecretId: string; // "/icav2/umccr-prod/service-production-jwt-token-secret-arn" +} + +export type OraCompressionIcav2PipelineManagerStackProps = + OraCompressionIcav2PipelineManagerConfig & cdk.StackProps; + +export class OraCompressionIcav2PipelineManagerStack extends cdk.Stack { + public readonly OraCompressionLaunchStateMachineObj: string; + private globals = { + workflowManagerSource: 'orcabus.workflowmanager', + outputCompressionDetailType: 'FastqListRowCompressed', + }; + + constructor(scope: Construct, id: string, props: OraCompressionIcav2PipelineManagerStackProps) { + super(scope, id, props); + + // Get dynamodb table for construct + const dynamodbTableObj = dynamodb.TableV2.fromTableName( + this, + 'bclconvertInteropQcICAv2AnalysesDynamoDBTable', + props.dynamodbTableName + ); + + // Get ICAv2 Access token secret object for construct + const icav2AccessTokenSecretObj = secretsManager.Secret.fromSecretNameV2( + this, + 'Icav2SecretsObject', + props.icav2TokenSecretId + ); + + // Get pipelineId + const pipelineIdSsmObj = ssm.StringParameter.fromStringParameterName( + this, + 'PipelineIdSsmParameter', + props.pipelineIdSsmPath + ); + const referenceUriSsmObj = ssm.StringParameter.fromStringParameterName( + this, + 'ReferenceUriSsmParameter', + props.referenceUriSsmPath + ); + + // Get the event bus object + const eventBusObj = events.EventBus.fromEventBusName(this, 'event_bus', props.eventBusName); + + /* + Generate the inputs sfn + */ + const configureInputsSfn = new sfn.StateMachine(this, 'configure_inputs_sfn', { + stateMachineName: `${props.stateMachinePrefix}-configure-inputs-json`, + definitionBody: sfn.DefinitionBody.fromFile( + path.join(__dirname, '../step_functions_templates/set_compression_inputs.asl.json') + ), + definitionSubstitutions: { + __table_name__: dynamodbTableObj.tableName, + __reference_uri_ssm_parameter_path__: referenceUriSsmObj.parameterName, + }, + }); + + // Configure inputs step function needs to read-write to the dynamodb table + dynamodbTableObj.grantReadWriteData(configureInputsSfn); + + // Configure step function allow read access to the ssm parameter path + referenceUriSsmObj.grantRead(configureInputsSfn); + + /* + Generate the outputs sfn + */ + + // Generate the lambda function to build the outputs json + const setOutputsJsonLambdaFunctionObj = new PythonFunction( + this, + 'set_outputs_json_lambda_function', + { + runtime: Runtime.PYTHON_3_12, + entry: path.join(__dirname, '../lambdas/set_outputs_json_py'), + architecture: Architecture.ARM_64, + handler: 'handler', + index: 'set_outputs_json.py', + environment: { + ICAV2_ACCESS_TOKEN_SECRET_ID: icav2AccessTokenSecretObj.secretName, + }, + timeout: Duration.seconds(60), + } + ); + + // Give the lambda function access to the secret + icav2AccessTokenSecretObj.grantRead(setOutputsJsonLambdaFunctionObj.currentVersion); + + // Generate outputs + const configureOutputsSfn = new sfn.StateMachine(this, 'configure_outputs_sfn', { + stateMachineName: `${props.stateMachinePrefix}-configure-outputs-json`, + definitionBody: sfn.DefinitionBody.fromFile( + path.join(__dirname, '../step_functions_templates/set_compression_outputs.asl.json') + ), + definitionSubstitutions: { + __table_name__: dynamodbTableObj.tableName, + __set_outputs_json_lambda_function_arn__: + setOutputsJsonLambdaFunctionObj.currentVersion.functionArn, + }, + }); + + // Configure step function write access to the dynamodb table + dynamodbTableObj.grantReadWriteData(configureOutputsSfn); + + // Configure step function invoke access to the lambda function + setOutputsJsonLambdaFunctionObj.currentVersion.grantInvoke(configureOutputsSfn); + + // Generate state machine for handling the 'READY' event + const handleWfmReadyEventSfn = new WfmWorkflowStateChangeIcav2ReadyEventHandlerConstruct( + this, + 'handle_wfm_ready_event', + { + tableName: dynamodbTableObj.tableName, + stateMachineName: `${props.stateMachinePrefix}-wfm-ready-event-handler`, + icav2AccessTokenSecretObj: icav2AccessTokenSecretObj, + workflowPlatformType: 'cwl', // Hardcoded this pipeline is a CWL pipeline. + detailType: props.detailType, + eventBusName: props.eventBusName, + triggerLaunchSource: props.triggerLaunchSource, + internalEventSource: props.internalEventSource, + generateInputsJsonSfn: configureInputsSfn, + pipelineIdSsmPath: pipelineIdSsmObj.parameterName, + workflowName: props.workflowName, + workflowVersion: props.workflowVersion, + serviceVersion: props.serviceVersion, + } + ).stateMachineObj; + + // Generate state machine for handling the external ICAv2 event + const handleExternalIcav2EventSfn = new Icav2AnalysisEventHandlerConstruct( + this, + 'handle_interop_qc_ready_event', + { + tableName: dynamodbTableObj.tableName, + stateMachineName: `${props.stateMachinePrefix}-icav2-external-handler`, + detailType: props.detailType, + eventBusName: props.eventBusName, + icaEventPipeName: props.icaEventPipeName, + internalEventSource: props.internalEventSource, + generateOutputsJsonSfn: configureOutputsSfn, + workflowName: props.workflowName, + workflowVersion: props.workflowVersion, + serviceVersion: props.serviceVersion, + } + ).stateMachineObj; + + // Generate the state machine for generating the fastq list row compression events + // First need the lambda + const setMergeSizesLambdaObj = new PythonFunction(this, 'set_merge_sizes_lambda_obj', { + runtime: Runtime.PYTHON_3_12, + entry: path.join(__dirname, '../lambdas/merge_file_sizes_for_fastq_list_rows_py'), + architecture: Architecture.ARM_64, + handler: 'handler', + index: 'merge_file_sizes_for_fastq_list_rows.py', + environment: { + ICAV2_ACCESS_TOKEN_SECRET_ID: icav2AccessTokenSecretObj.secretName, + }, + timeout: Duration.seconds(60), + }); + // Give the lambda function access to the secret + icav2AccessTokenSecretObj.grantRead(setMergeSizesLambdaObj.currentVersion); + + /* + Part 3 - add on service to collect outputs from the succeeded v2 workflow and generate the fastq list row compression events + */ + const generateFastqListRowCompressionEventsSfn = new sfn.StateMachine( + this, + 'generate_fastq_list_row_compression_events_sfn', + { + stateMachineName: `${props.stateMachinePrefix}-generate-fqlr-ora-events`, + definitionBody: sfn.DefinitionBody.fromFile( + path.join( + __dirname, + '../step_functions_templates/fastq_list_row_compression_event.asl.json' + ) + ), + definitionSubstitutions: { + __merge_sizes_lambda_function_arn__: setMergeSizesLambdaObj.currentVersion.functionArn, + }, + } + ); + + // Configure step function invoke access to the lambda function + setMergeSizesLambdaObj.currentVersion.grantInvoke(generateFastqListRowCompressionEventsSfn); + + // Allow the step functions to submit events to the event bus + eventBusObj.grantPutEventsTo(generateFastqListRowCompressionEventsSfn); + + // Generate rule to trigger the fastq list row compression events + const generateFastqListRowCompressionEventsRule = new events.Rule( + this, + 'generate_fastq_list_row_compression_events_rule', + { + eventBus: eventBusObj, + ruleName: `${props.stateMachinePrefix}-generate-fqlr-ora-events-rule`, + eventPattern: { + source: [this.globals.workflowManagerSource], + detailType: [props.detailType], + detail: { + workflowName: [{ 'equals-ignore-case': props.workflowName }], + state: [{ 'equals-ignore-case': 'SUCCEEDED' }], + }, + }, + } + ); + + // Add the target to the rule + generateFastqListRowCompressionEventsRule.addTarget( + new eventsTargets.SfnStateMachine(generateFastqListRowCompressionEventsSfn, { + input: events.RuleTargetInput.fromEventPath('$.detail'), + }) + ); + } +} diff --git a/lib/workload/stateless/stacks/ora-compression-manager/lambdas/merge_file_sizes_for_fastq_list_rows_py/merge_file_sizes_for_fastq_list_rows.py b/lib/workload/stateless/stacks/ora-compression-manager/lambdas/merge_file_sizes_for_fastq_list_rows_py/merge_file_sizes_for_fastq_list_rows.py new file mode 100644 index 000000000..a8f57f850 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-compression-manager/lambdas/merge_file_sizes_for_fastq_list_rows_py/merge_file_sizes_for_fastq_list_rows.py @@ -0,0 +1,349 @@ +#!/usr/bin/env python3 + +""" +Merge file sizes for fastq list rows + +Given the output directory, +* collect the file sizes for the original gzipped tsv, +* the md5sum txts for both the fastq gzipped and ora files +* collect the file sizes for the new ora compressed tsv + + +""" +from io import StringIO + +import pandas as pd +import typing +import boto3 +from os import environ +from urllib.parse import urlparse, urlunparse +from pathlib import Path + +# Wrapica imports +from wrapica.enums import DataType +from wrapica.project_data import ( + ProjectData, + convert_uri_to_project_data_obj, + list_project_data_non_recursively, + read_icav2_file_contents +) + + +# Type checking +if typing.TYPE_CHECKING: + from mypy_boto3_ssm import SSMClient + from mypy_boto3_secretsmanager import SecretsManagerClient + +# Globals +ICAV2_BASE_URL = "https://ica.illumina.com/ica/rest" + +# AWS things +def get_ssm_client() -> 'SSMClient': + """ + Return SSM client + """ + return boto3.client("ssm") + + +def get_secrets_manager_client() -> 'SecretsManagerClient': + """ + Return Secrets Manager client + """ + return boto3.client("secretsmanager") + + +def get_ssm_parameter_value(parameter_path) -> str: + """ + Get the ssm parameter value from the parameter path + :param parameter_path: + :return: + """ + return get_ssm_client().get_parameter(Name=parameter_path)["Parameter"]["Value"] + + +def get_secret(secret_arn: str) -> str: + """ + Return secret value + """ + return get_secrets_manager_client().get_secret_value(SecretId=secret_arn)["SecretString"] + + +# Set the icav2 environment variables +def set_icav2_env_vars(): + """ + Set the icav2 environment variables + :return: + """ + environ["ICAV2_BASE_URL"] = ICAV2_BASE_URL + environ["ICAV2_ACCESS_TOKEN"] = get_secret( + environ["ICAV2_ACCESS_TOKEN_SECRET_ID"] + ) + + +def read_md5sum(project_data: ProjectData) -> pd.DataFrame: + """ + Read the csv from the icav2 + :param project_data: + :return: + """ + with read_icav2_file_contents(project_data.project_id, project_data.data.id) as file_contents: + return pd.read_csv( + StringIO(file_contents), + header=None, + names=["md5sum", "file_name"], + sep=" ", + engine="python" + ) + + +def read_file_sizes(project_data: ProjectData) -> pd.DataFrame: + with read_icav2_file_contents(project_data.project_id, project_data.data.id) as file_contents: + return pd.read_csv( + StringIO(file_contents), + sep="\t" + ) + + +def read_fastq_list_csv(project_data: ProjectData) -> pd.DataFrame: + """ + Read the csv from the icav2 + :param project_data: + :return: + """ + with read_icav2_file_contents(project_data.project_id, project_data.data.id) as file_contents: + return pd.read_csv(StringIO(file_contents)) + + +def get_icav2_file_from_folder(project_data_list: typing.List[ProjectData], file_name: str) -> ProjectData: + """ + Get the file from the list + :param project_data_list: + :param file_name: + :return: + """ + try: + return next( + filter( + lambda project_data_iter: ( + project_data_iter.data.details.name == file_name and + DataType[project_data_iter.data.details.data_type] == DataType.FILE + ), + project_data_list + ) + ) + except StopIteration: + raise ValueError(f"{file_name} not found in the project data list") + + +def merge_fastq_list_rows_with_md5sums_and_filesizes( + fastq_list_ora_df: pd.DataFrame, + fastq_gzipped_md5_df: pd.DataFrame, + fastq_ora_md5_df: pd.DataFrame, + fastq_gzipped_filesizes_df: pd.DataFrame, + fastq_ora_filesizes_df: pd.DataFrame +) -> pd.DataFrame: + """ + Given the fastq list rows, the md5sums and the file sizes, merge them together + :param fastq_list_ora_df: + :param fastq_gzipped_md5_df: + :param fastq_ora_md5_df: + :param fastq_gzipped_filesizes_df: + :param fastq_ora_filesizes_df: + :return: + """ + # Extend the gzipped md5s to the fastq list rows + fastq_list_ora_df = fastq_list_ora_df.merge( + fastq_gzipped_md5_df, + how="left", + left_on="read1File", + right_on="fileName" + ).drop( + columns='fileName' + ).rename( + columns={"md5sum": "read1FileGzippedMd5sum"} + ).merge( + fastq_gzipped_md5_df, + how="left", + left_on="read2File", + right_on="fileName" + ).drop( + columns='fileName' + ).rename( + columns={"md5sum": "read2FileGzippedMd5sum"} + ) + + # Extend the ora md5s to the fastq list rows + fastq_list_ora_df = fastq_list_ora_df.merge( + fastq_ora_md5_df, + how="left", + left_on="read1File", + right_on="fileName" + ).drop( + columns='fileName' + ).rename( + columns={"md5sum": "read1FileOraMd5sum"} + ).merge( + fastq_ora_md5_df, + how="left", + left_on="read2File", + right_on="fileName" + ).drop( + columns='fileName' + ).rename( + columns={"md5sum": "read2FileOraMd5sum"} + ) + + # Extend the gzipped file sizes to the fastq list rows + fastq_list_ora_df = fastq_list_ora_df.merge( + fastq_gzipped_filesizes_df, + how="left", + left_on="read1File", + right_on="fileName" + ).drop( + columns='fileName' + ).rename( + columns={"fileSize": "read1GzippedFileSize"} + ).merge( + fastq_gzipped_filesizes_df, + how="left", + left_on="read2File", + right_on="fileName" + ).drop( + columns='fileName' + ).rename( + columns={"fileSize": "read2GzippedFileSize"} + ) + + fastq_list_ora_df = fastq_list_ora_df.merge( + fastq_ora_filesizes_df, + how="left", + left_on="read1File", + right_on="fileName" + ).drop( + columns='fileName' + ).rename( + columns={"fileSize": "read1OraFileSize"} + ).merge( + fastq_ora_filesizes_df, + how="left", + left_on="read2File", + right_on="fileName" + ).drop( + columns='fileName' + ).rename( + columns={"fileSize": "read2OraFileSize"} + ) + + # Return the fastq files + return fastq_list_ora_df + + +def join_url(base_url: str, path: str) -> str: + """ + Join the base url and the path + :param base_url: + :param path: + :return: + """ + url_obj = urlparse(base_url) + + return str(urlunparse( + ( + url_obj.scheme, + url_obj.netloc, + str(Path(url_obj.path).joinpath(path)), + None, None, None + ) + )) + + +def handler(event, context): + """ + Given the output directory + + * collect the new fastq list rows csv file + * collect the file sizes for the original gzipped tsv, + * the md5sum txts for both the fastq gzipped and ora files + * collect the file sizes for the new ora compressed tsv + :param event: + :param context: + :return: + """ + + # Set the icav2 env vars before anything else + set_icav2_env_vars() + + # Get the output directory + output_dir_uri = event["output_dir_uri"] + + # Get the output directory as an icav2 object + output_dir_project_obj: ProjectData = convert_uri_to_project_data_obj(output_dir_uri) + + # Analysis list + output_dir_project_data_list = list_project_data_non_recursively( + project_id=output_dir_project_obj.project_id, + parent_folder_id=output_dir_project_obj.data.id, + ) + + # Get the new fastq list rows csv file + fastq_list_ora_df = read_fastq_list_csv( + get_icav2_file_from_folder( + output_dir_project_data_list, + "fastq_list_ora.csv") + ) + + # Get the md5sum txts for both the fastq gzipped and ora files + fastq_gzipped_md5_file_obj = read_md5sum( + get_icav2_file_from_folder( + output_dir_project_data_list, + "fastq_gzipped_md5.txt") + ) + + # Get the file sizes for the new ora compressed tsv + fastq_ora_md5_file_obj = read_md5sum( + get_icav2_file_from_folder( + output_dir_project_data_list, + "fastq_ora_md5.txt") + ) + + # Get the original gzipped tsv file size + fastq_gzipped_filesizes_file_obj = read_file_sizes( + get_icav2_file_from_folder( + output_dir_project_data_list, + "fastq_gzipped_filesizes.tsv" + ) + ) + + # Get the md5sum txts for both the fastq gzipped and ora files + fastq_ora_filesizes_file_obj = read_file_sizes( + get_icav2_file_from_folder( + output_dir_project_data_list, + "fastq_ora_filesizes.tsv" + ) + ) + + # Merge the fastq list rows with the md5sums and file sizes + fastq_list_ora_df = merge_fastq_list_rows_with_md5sums_and_filesizes( + fastq_list_ora_df, + fastq_gzipped_md5_file_obj, + fastq_ora_md5_file_obj, + fastq_gzipped_filesizes_file_obj, + fastq_ora_filesizes_file_obj + ) + + # Convert read1File and read2File to URIs + fastq_list_ora_df["read1FileUri"] = fastq_list_ora_df["read1File"].apply( + lambda file_name: join_url(output_dir_uri, file_name) + ) + fastq_list_ora_df["read2FileUri"] = fastq_list_ora_df["read2File"].apply( + lambda file_name: join_url(output_dir_uri, file_name) + ) + + # Delete read1File and read2File columns + fastq_list_ora_df.drop( + columns=["read1File", "read2File"], + inplace=True + ) + + # Return the merged file as a json list + return fastq_list_ora_df.to_dict(orient="records") diff --git a/lib/workload/stateless/stacks/ora-compression-manager/lambdas/merge_file_sizes_for_fastq_list_rows_py/requirements.txt b/lib/workload/stateless/stacks/ora-compression-manager/lambdas/merge_file_sizes_for_fastq_list_rows_py/requirements.txt new file mode 100644 index 000000000..fecd999d7 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-compression-manager/lambdas/merge_file_sizes_for_fastq_list_rows_py/requirements.txt @@ -0,0 +1,2 @@ +pandas==2.2.3 +wrapica==2.27.1.post20240830140737 \ No newline at end of file diff --git a/lib/workload/stateless/stacks/ora-compression-manager/lambdas/set_outputs_json_py/requirements.txt b/lib/workload/stateless/stacks/ora-compression-manager/lambdas/set_outputs_json_py/requirements.txt new file mode 100644 index 000000000..25d5e5f58 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-compression-manager/lambdas/set_outputs_json_py/requirements.txt @@ -0,0 +1 @@ +wrapica>=2.27.1.post20240830140737 diff --git a/lib/workload/stateless/stacks/ora-compression-manager/lambdas/set_outputs_json_py/set_outputs_json.py b/lib/workload/stateless/stacks/ora-compression-manager/lambdas/set_outputs_json_py/set_outputs_json.py new file mode 100644 index 000000000..de5fae755 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-compression-manager/lambdas/set_outputs_json_py/set_outputs_json.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python + +""" +Given an analysis output uri, + +This script will generate the expected output json for the analysis. + +{ + "analysis_output_uri": "icav2://7595e8f2-32d3-4c76-a324-c6a85dae87b5/interop_qc/20240513a3fb6502/out/" +} + +Yields + +{ + "interop_output_dir": "", + "multiqc_html_report": "", + "multiqc_output_dir": "", +} + +We don't use the outputs json endpoint since we cannot rely on its consistency +https://github.com/umccr-illumina/ica_v2/issues/185 + +Instead we just take the output uri and find the directories as expected +""" + +# Standard imports +from os import environ +import typing +import boto3 + +# IDE imports only +if typing.TYPE_CHECKING: + from mypy_boto3_secretsmanager.client import SecretsManagerClient + +# ICA imports +from wrapica.enums import DataType, UriType +from wrapica.libica_models import ProjectData +from wrapica.project_data import ( + convert_uri_to_project_data_obj, convert_project_data_obj_to_uri, + list_project_data_non_recursively +) + + +# Globals +ICAV2_BASE_URL = "https://ica.illumina.com/ica/rest" + + +def get_secrets_manager_client() -> 'SecretsManagerClient': + """ + Return Secrets Manager client + """ + return boto3.client("secretsmanager") + + +def get_secret(secret_id: str) -> str: + """ + Return secret value + """ + return get_secrets_manager_client().get_secret_value(SecretId=secret_id)["SecretString"] + + +def set_icav2_env_vars(): + """ + Set the icav2 environment variables + :return: + """ + environ["ICAV2_BASE_URL"] = ICAV2_BASE_URL + environ["ICAV2_ACCESS_TOKEN"] = get_secret( + environ["ICAV2_ACCESS_TOKEN_SECRET_ID"] + ) + + +def handler(events, context): + # Set icav2 env vars + set_icav2_env_vars() + + # Get analysis uri + analysis_uri = events.get("analysis_output_uri") + instrument_run_id = events.get("instrument_run_id") + + # Convert analysis uri to project folder object + analysis_project_data_obj = convert_uri_to_project_data_obj(analysis_uri) + + # Analysis list + analysis_top_level_data_list = list_project_data_non_recursively( + project_id=analysis_project_data_obj.project_id, + parent_folder_id=analysis_project_data_obj.data.id, + ) + + # Get multiqc directory + try: + instrument_run_id_project_data_obj: ProjectData = next( + filter( + lambda project_data_iter: ( + project_data_iter.data.details.name == instrument_run_id and + DataType[project_data_iter.data.details.data_type] == DataType.FOLDER + ), + analysis_top_level_data_list + ) + ) + except StopIteration: + raise ValueError(f"instrument run id directory '{instrument_run_id}' not found in '{analysis_uri}'") + + return { + "instrument_run_ora_output_uri": convert_project_data_obj_to_uri(instrument_run_id_project_data_obj, UriType.S3), + } + + +# if __name__ == "__main__": +# import json +# import os +# os.environ['ICAV2_ACCESS_TOKEN_SECRET_ID'] = "ICAv2JWTKey-umccr-prod-service-dev" +# print( +# json.dumps( +# handler( +# { +# "instrument_run_id": "240229_A00130_0288_BH5HM2DSXC", +# "analysis_output_uri": "icav2://development/analysis/ora-compression/20240806ce5755fd/" +# }, +# None +# ), +# indent=2 +# ) +# ) +# # { +# # "interop_output_dir": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/analysis/bclconvert-interop-qc/20240806ce5755fd/interop_summary_files/", +# # "multiqc_html_report": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/analysis/bclconvert-interop-qc/20240806ce5755fd/multiqc/240229_A00130_0288_BH5HM2DSXC_multiqc_report.html", +# # "multiqc_output_dir": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/analysis/bclconvert-interop-qc/20240806ce5755fd/multiqc/" +# # } diff --git a/lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/fastq_list_row_compression_event.asl.json b/lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/fastq_list_row_compression_event.asl.json new file mode 100644 index 000000000..6450c5a49 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/fastq_list_row_compression_event.asl.json @@ -0,0 +1,63 @@ +{ + "Comment": "A description of my state machine", + "StartAt": "Get fastq files from fastq list csv", + "States": { + "Get fastq files from fastq list csv": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "OutputPath": "$.Payload", + "Parameters": { + "Payload.$": "$", + "FunctionName": "${__merge_sizes_lambda_function_arn__}" + }, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.AWSLambdaException", + "Lambda.SdkClientException", + "Lambda.TooManyRequestsException" + ], + "IntervalSeconds": 1, + "MaxAttempts": 3, + "BackoffRate": 2 + } + ], + "ResultSelector": { + "fastq_list_events_map.$": "$.Payload" + }, + "Next": "Fan out fastq list row compression event creation" + }, + "Fan out fastq list row compression event creation": { + "Type": "Map", + "ItemsPath": "$.fastq_list_events_map", + "ItemSelector": { + "output_event_detail.$": "$$.Map.Item.Value" + }, + "ItemProcessor": { + "ProcessorConfig": { + "Mode": "INLINE" + }, + "StartAt": "Put Fastq List Row Compression Event", + "States": { + "Put Fastq List Row Compression Event": { + "Type": "Task", + "Resource": "arn:aws:states:::events:putEvents", + "Parameters": { + "Entries": [ + { + "EventBusName": "${__event_bus_name__}", + "Source": "orcabus.oracompressionmanager", + "DetailType": "${__detail_type__}", + "Detail.$": "$.output_event_detail" + } + ] + }, + "End": true + } + } + }, + "End": true + } + } +} diff --git a/lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/set_compression_inputs.asl.json b/lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/set_compression_inputs.asl.json new file mode 100644 index 000000000..ba0d5ab91 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/set_compression_inputs.asl.json @@ -0,0 +1,99 @@ +{ + "Comment": "Launch Icav2 Pipeline and log in db", + "StartAt": "Get DataBase Entry", + "States": { + "Get DataBase Entry": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Parameters": { + "TableName": "${__table_name__}", + "Key": { + "id.$": "$.portal_run_id", + "id_type": "portal_run_id" + } + }, + "ResultSelector": { + "inputs.$": "States.StringToJson($.Item.ready_event_data_inputs.S)", + "engine_parameters.$": "States.StringToJson($.Item.engine_parameters.S)" + }, + "ResultPath": "$.get_input_parameters_from_event", + "Next": "Has ORA Reference URI set" + }, + "Has ORA Reference URI set": { + "Type": "Choice", + "Choices": [ + { + "Variable": "$.get_input_parameters_from_event.inputs.oraReferenceUri", + "IsPresent": false, + "Next": "Get ORA Reference URI", + "Comment": "Reference parameter not set" + } + ], + "Default": "Set ORA Reference URI" + }, + "Get ORA Reference URI": { + "Type": "Task", + "Parameters": { + "Name": "${__reference_uri_ssm_parameter_path__}" + }, + "Resource": "arn:aws:states:::aws-sdk:ssm:getParameter", + "ResultPath": "$.get_ora_reference_uri_step", + "ResultSelector": { + "ora_reference_uri.$": "$.Parameter.Value" + }, + "Next": "Set Input Json from Event Payload" + }, + "Set ORA Reference URI": { + "Type": "Pass", + "Parameters": { + "ora_reference_uri.$": "$.get_input_parameters_from_event.inputs.oraReferenceUri" + }, + "ResultPath": "$.get_ora_reference_uri_step", + "Next": "Set Input Json from Event Payload" + }, + "Set Input Json from Event Payload": { + "Type": "Pass", + "Parameters": { + "input_json": { + "instrument_run_directory": { + "class": "Directory", + "basename.$": "$.get_input_parameters_from_event.inputs.instrumentRunId", + "location.$": "$.get_input_parameters_from_event.inputs.instrumentRunDirectoryUri" + }, + "ora_reference": { + "class": "File", + "location.$": "$.get_ora_reference_uri_step.ora_reference_uri" + }, + "ora_print_file_info": true + } + }, + "ResultPath": "$.set_input_json", + "Next": "Update Database Entry" + }, + "Update Database Entry": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:updateItem", + "Parameters": { + "TableName": "${__table_name__}", + "Key": { + "id.$": "$.portal_run_id", + "id_type": "portal_run_id" + }, + "UpdateExpression": "SET input_json = :input_json", + "ExpressionAttributeValues": { + ":input_json": { + "S.$": "States.JsonToString($.set_input_json.input_json)" + } + } + }, + "ResultPath": "$.update_entry_post_launch_step", + "Next": "Wait 1 Second (Pre-execution)" + }, + "Wait 1 Second (Pre-execution)": { + "Type": "Wait", + "Seconds": 1, + "Comment": "Wait for databases to sync before continuing", + "End": true + } + } +} diff --git a/lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/set_compression_outputs.asl.json b/lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/set_compression_outputs.asl.json new file mode 100644 index 000000000..492c8fb85 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-compression-manager/step_functions_templates/set_compression_outputs.asl.json @@ -0,0 +1,80 @@ +{ + "Comment": "Get CWL Outputs from BCLConvert InterOp QC pipeline", + "StartAt": "Get DB Attributes", + "States": { + "Get DB Attributes": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Parameters": { + "TableName": "${__table_name__}", + "Key": { + "id.$": "$.portal_run_id", + "id_type": "portal_run_id" + } + }, + "ResultSelector": { + "ready_event_data_inputs.$": "States.StringToJson($.Item.ready_event_data_inputs.S)", + "analysis_output_uri.$": "$.Item.analysis_output_uri.S" + }, + "ResultPath": "$.get_db_attributes_step", + "Next": "Set outputs from analysis uri" + }, + "Set outputs from analysis uri": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { + "FunctionName": "${__set_outputs_json_lambda_function_arn__}", + "Payload": { + "instrument_run_id.$": "$.ready_event_data_inputs.instrumentRunId", + "analysis_output_uri.$": "$.get_db_attributes_step.analysis_output_uri" + } + }, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.AWSLambdaException", + "Lambda.SdkClientException", + "Lambda.TooManyRequestsException" + ], + "IntervalSeconds": 1, + "MaxAttempts": 3, + "BackoffRate": 2 + } + ], + "TimeoutSeconds": 60, + "ResultSelector": { + "output_json": { + "instrumentRunOraOutputUri.$": "$.Payload.instrument_run_ora_output_uri" + } + }, + "ResultPath": "$.analysis_outputs_step", + "Next": "Update analysisOutput column in DB" + }, + "Update analysisOutput column in DB": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:updateItem", + "Parameters": { + "TableName": "${__table_name__}", + "Key": { + "id.$": "$.portal_run_id", + "id_type": "portal_run_id" + }, + "UpdateExpression": "SET analysis_output = :output_json", + "ExpressionAttributeValues": { + ":output_json": { + "S.$": "States.JsonToString($.analysis_outputs_step.output_json)" + } + } + }, + "ResultPath": "$.update_entry_post_launch_step", + "Next": "Wait 1 Second (Post-update)" + }, + "Wait 1 Second (Post-update)": { + "Type": "Wait", + "Seconds": 1, + "Comment": "Wait for databases to sync before continuing", + "End": true + } + } +} diff --git a/lib/workload/stateless/stacks/ora-decompression-manager/README.md b/lib/workload/stateless/stacks/ora-decompression-manager/README.md new file mode 100644 index 000000000..7cb506a77 --- /dev/null +++ b/lib/workload/stateless/stacks/ora-decompression-manager/README.md @@ -0,0 +1,48 @@ +# ORA Decompression Manager + +Standalone decompression manager, useful for ad-hoc decompression of ora files +without needing to integrate the ora-decompression component. + +You will need to know the estimated file size of the gzipped compressed file. + +Takes in a decompression request event and outputs the data in the expected output uri. + +```json5 +{ + "detailType": "OraFastqListRowDecompression", + "detail": { + "status": "READY", + "payload": { + "data": { + "read1OraFileUri": "s3://path/to/read1OraFileUri.ora", + "read1GzOutputFileUri": "s3://path/to/read1GzFileUri.gz", + "read2OraFileUri": "s3://path/to/read2OraFileUri.ora", + "read2GzOutputFileUri": "s3://path/to/read2GzFileUri.gz", + "read1EstimatedGzFileSize": "123456789", // In Bytes + "read2EstimatedGzFileSize": "123456789", // In Bytes + } + } + } +} +``` + +Which then relays the following event once complete + +```json5 +{ + "detailType": "OraFastqListRowDecompression", + "detail": { + "status": "COMPLETE", + "payload": { + "data": { + "read1OraFileUri": "s3://path/to/read1OraFileUri.ora", + "read1GzOutputFileUri": "s3://path/to/read1GzFileUri.gz", + "read2OraFileUri": "s3://path/to/read2OraFileUri.ora", + "read2GzOutputFileUri": "s3://path/to/read2GzFileUri.gz", + "read1EstimatedGzFileSize": "123456789", // In Bytes + "read2EstimatedGzFileSize": "123456789", // In Bytes + } + } + } +} +``` diff --git a/lib/workload/stateless/stacks/ora-decompression-manager/deploy/index.ts b/lib/workload/stateless/stacks/ora-decompression-manager/deploy/index.ts new file mode 100644 index 000000000..a208b693b --- /dev/null +++ b/lib/workload/stateless/stacks/ora-decompression-manager/deploy/index.ts @@ -0,0 +1,96 @@ +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; +import * as events from 'aws-cdk-lib/aws-events'; +import * as secretsManager from 'aws-cdk-lib/aws-secretsmanager'; +import { OraDecompressionConstruct } from '../../../../components/ora-file-decompression-fq-pair-sfn'; +import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; +import path from 'path'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as eventsTargets from 'aws-cdk-lib/aws-events-targets'; + +export interface OraDecompressionPipelineManagerConfig { + /* Stack essentials */ + eventBusName: string; + detailType: string; + triggerEventSource: string; + outputEventSource: string; + stateMachinePrefix: string; + /* ICAv2 Pipeline analysis essentials */ + icav2TokenSecretId: string; // "/icav2/umccr-prod/service-production-jwt-token-secret-arn" +} + +export type OraDecompressionManagerStackProps = OraDecompressionPipelineManagerConfig & + cdk.StackProps; + +export class OraDecompressionManagerStack extends cdk.Stack { + constructor(scope: Construct, id: string, props: OraDecompressionManagerStackProps) { + super(scope, id, props); + + // Get ICAv2 Access token secret object for construct + const icav2AccessTokenSecretObj = secretsManager.Secret.fromSecretNameV2( + this, + 'icav2_secrets_object', + props.icav2TokenSecretId + ); + + // Get the copy batch state machine name + const oraDecompressionSfnConstruct = new OraDecompressionConstruct( + this, + 'ora_decompression_sfn_construct', + { + sfnPrefix: props.stateMachinePrefix, + icav2AccessTokenSecretId: icav2AccessTokenSecretObj.secretName, + } + ); + + // Generate the parent step function + const oraManagerSfn = new sfn.StateMachine(this, 'state_machine', { + stateMachineName: `${props.stateMachinePrefix}-event-handler-sfn`, + definitionBody: sfn.DefinitionBody.fromFile( + path.join( + __dirname, + '../step_functions_templates/ora_decompression_manager_sfn_template.asl.json' + ) + ), + definitionSubstitutions: { + /* Event */ + __event_bus_name__: props.eventBusName, + __detail_type__: props.detailType, + __source__: props.outputEventSource, + /* Decompression SFN */ + __ora_decompression_sfn_arn__: oraDecompressionSfnConstruct.sfnObject.stateMachineArn, + }, + }); + + // Because we run a nested state machine, we need to add the permissions to the state machine role + // See https://stackoverflow.com/questions/60612853/nested-step-function-in-a-step-function-unknown-error-not-authorized-to-cr + oraManagerSfn.addToRolePolicy( + new iam.PolicyStatement({ + resources: [ + `arn:aws:events:${cdk.Aws.REGION}:${cdk.Aws.ACCOUNT_ID}:rule/StepFunctionsGetEventsForStepFunctionsExecutionRule`, + ], + actions: ['events:PutTargets', 'events:PutRule', 'events:DescribeRule'], + }) + ); + + /* Grant the state machine access to invoke the internal launch sfn machine */ + oraDecompressionSfnConstruct.sfnObject.grantStartExecution(oraManagerSfn); + + // Create a rule to trigger the state machine + const rule = new events.Rule(this, 'rule', { + eventBus: events.EventBus.fromEventBusName(this, 'event_bus', props.eventBusName), + eventPattern: { + source: [props.triggerEventSource], + detailType: [props.detailType], + }, + }); + + // Add target to the rule + rule.addTarget( + new eventsTargets.SfnStateMachine(oraManagerSfn, { + input: events.RuleTargetInput.fromEventPath('$.detail'), + }) + ); + } +} diff --git a/lib/workload/stateless/stacks/ora-decompression-manager/step_functions_templates/ora_decompression_manager_sfn_template.asl.json b/lib/workload/stateless/stacks/ora-decompression-manager/step_functions_templates/ora_decompression_manager_sfn_template.asl.json new file mode 100644 index 000000000..49dd89dda --- /dev/null +++ b/lib/workload/stateless/stacks/ora-decompression-manager/step_functions_templates/ora_decompression_manager_sfn_template.asl.json @@ -0,0 +1,50 @@ +{ + "Comment": "A description of my state machine", + "StartAt": "Start Decompression Service", + "States": { + "Start Decompression Service": { + "Type": "Task", + "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Parameters": { + "StateMachineArn": "${__ora_decompression_sfn_arn__}", + "Input": { + "read1OraFileUri.$": "$.read1OraFileUri", + "read1GzOutputFileUri.$": "$.read1GzOutputFileUri", + "read2OraFileUri.$": "$.read2OraFileUri", + "read2GzOutputFileUri.$": "$.read2GzOutputFileUri", + "read1EstimatedGzFileSize.$": "$.read1EstimatedGzFileSize", + "read2EstimatedGzFileSize.$": "$.read2EstimatedGzFileSize" + } + }, + "ResultPath": null, + "Next": "Raise Completion Event" + }, + "Raise Completion Event": { + "Type": "Task", + "Resource": "arn:aws:states:::events:putEvents", + "Parameters": { + "Entries": [ + { + "EventBusName": "${__event_bus_name__}", + "DetailType": "${__detail_type__}", + "Source": "${__source__}", + "Detail": { + "status": "SUCCEEDED", + "payload": { + "data": { + "read1OraFileUri.$": "$.read1OraFileUri", + "read1GzOutputFileUri.$": "$.read1GzOutputFileUri", + "read2OraFileUri.$": "$.read2OraFileUri", + "read2GzOutputFileUri.$": "$.read2GzOutputFileUri", + "read1EstimatedGzFileSize.$": "$.read1EstimatedGzFileSize", + "read2EstimatedGzFileSize.$": "$.read2EstimatedGzFileSize" + } + } + } + } + ] + }, + "End": true + } + } +} diff --git a/lib/workload/stateless/statelessStackCollectionClass.ts b/lib/workload/stateless/statelessStackCollectionClass.ts index feecad6dc..01fa31e18 100644 --- a/lib/workload/stateless/statelessStackCollectionClass.ts +++ b/lib/workload/stateless/statelessStackCollectionClass.ts @@ -57,6 +57,11 @@ import { RnasumIcav2PipelineManagerStack, RnasumIcav2PipelineManagerStackProps, } from './stacks/rnasum-pipeline-manager/deploy'; +import { + OraCompressionIcav2PipelineManagerStack, + OraCompressionIcav2PipelineManagerStackProps, +} from './stacks/ora-compression-manager/deploy'; + import { FMAnnotator, FMAnnotatorConfigurableProps } from './stacks/fmannotator/deploy/stack'; import { PieriandxPipelineManagerStack, @@ -70,6 +75,10 @@ import { SashNfPipelineManagerStack, SashNfPipelineManagerStackProps, } from './stacks/sash-pipeline-manager/deploy'; +import { + OraDecompressionManagerStack, + OraDecompressionManagerStackProps, +} from './stacks/ora-decompression-manager/deploy'; export interface StatelessStackCollectionProps { metadataManagerStackProps: MetadataManagerStackProps; @@ -87,6 +96,8 @@ export interface StatelessStackCollectionProps { pieriandxPipelineManagerStackProps: PierianDxPipelineManagerStackProps; oncoanalyserPipelineManagerStackProps: OncoanalyserNfPipelineManagerStackProps; sashPipelineManagerStackProps: SashNfPipelineManagerStackProps; + oraCompressionIcav2PipelineManagerStackProps: OraCompressionIcav2PipelineManagerStackProps; + oraDecompressionManagerStackProps: OraDecompressionManagerStackProps; eventSchemaStackProps: SchemaStackProps; dataSchemaStackProps: SchemaStackProps; bclConvertManagerStackProps: BclConvertManagerStackProps; @@ -112,6 +123,8 @@ export class StatelessStackCollection { readonly pieriandxPipelineManagerStack: Stack; readonly oncoanalyserPipelineManagerStack: Stack; readonly sashPipelineManagerStack: Stack; + readonly oraCompressionIcav2PipelineManagerStack: Stack; + readonly oraDecompressionManagerStack: Stack; readonly eventSchemaStack: Stack; readonly dataSchemaStack: Stack; readonly bclConvertManagerStack: Stack; @@ -259,6 +272,24 @@ export class StatelessStackCollection { } ); + this.oraCompressionIcav2PipelineManagerStack = new OraCompressionIcav2PipelineManagerStack( + scope, + 'OraCompressionIcav2PipelineManagerStack', + { + ...this.createTemplateProps(env, 'OraCompressionIcav2PipelineManagerStack'), + ...statelessConfiguration.oraCompressionIcav2PipelineManagerStackProps, + } + ); + + this.oraDecompressionManagerStack = new OraDecompressionManagerStack( + scope, + 'OraDecompressionManagerStack', + { + ...this.createTemplateProps(env, 'OraDecompressionManagerStack'), + ...statelessConfiguration.oraDecompressionManagerStackProps, + } + ); + this.bclConvertManagerStack = new BclConvertManagerStack(scope, 'BclConvertManagerStack', { ...this.createTemplateProps(env, 'BclConvertManagerStack'), ...statelessConfiguration.bclConvertManagerStackProps, From 8e9573a226d34bb14400588e870e956777fa4e4b Mon Sep 17 00:00:00 2001 From: Alexis Lucattini Date: Fri, 1 Nov 2024 20:40:21 +1100 Subject: [PATCH 2/4] Add Nag Suppressions on role level --- .../components/ora-file-decompression-fq-pair-sfn/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts b/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts index b32d47c5a..b9c4a5a19 100644 --- a/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts @@ -65,7 +65,7 @@ export class OraDecompressionConstruct extends Construct { // "Effect": "Allow", // "Resource": "*" // }, - NagSuppressions.addResourceSuppressions(taskDefinition, [ + NagSuppressions.addResourceSuppressions(taskDefinition.executionRole, [ { id: 'AwsSolutions-IAM5', reason: 'Fargate has GetAuthorizationToken permission on all resources by default', From c467161c2f40631c54cff1924c0483ac01f5042d Mon Sep 17 00:00:00 2001 From: Alexis Lucattini Date: Sat, 2 Nov 2024 07:39:17 +1100 Subject: [PATCH 3/4] Nag Suppression should be applied to children --- .../ora-file-decompression-fq-pair-sfn/index.ts | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts b/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts index b9c4a5a19..ef3e47f4b 100644 --- a/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts @@ -65,12 +65,16 @@ export class OraDecompressionConstruct extends Construct { // "Effect": "Allow", // "Resource": "*" // }, - NagSuppressions.addResourceSuppressions(taskDefinition.executionRole, [ - { - id: 'AwsSolutions-IAM5', - reason: 'Fargate has GetAuthorizationToken permission on all resources by default', - }, - ]); + NagSuppressions.addResourceSuppressions( + taskDefinition, + [ + { + id: 'AwsSolutions-IAM5', + reason: 'Fargate has GetAuthorizationToken permission on all resources by default', + }, + ], + true + ); // Add permission to task role const icav2SecretObj = secretsManager.Secret.fromSecretNameV2( From 79d54a410e3c5ed08515d85c966ae211ccdb7bc7 Mon Sep 17 00:00:00 2001 From: Alexis Lucattini Date: Sat, 2 Nov 2024 11:27:50 +1100 Subject: [PATCH 4/4] Nag suppression add resource suppressions after creation of Ora Container Image --- .../index.ts | 34 +++++++++---------- .../ora-decompression-manager/deploy/index.ts | 1 - 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts b/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts index ef3e47f4b..1898abd61 100644 --- a/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts +++ b/lib/workload/components/ora-file-decompression-fq-pair-sfn/index.ts @@ -59,23 +59,6 @@ export class OraDecompressionConstruct extends Construct { }, }); - // FIXME - cdk nag error on fargate task definition role - // { - // "Action": "ecr:GetAuthorizationToken", - // "Effect": "Allow", - // "Resource": "*" - // }, - NagSuppressions.addResourceSuppressions( - taskDefinition, - [ - { - id: 'AwsSolutions-IAM5', - reason: 'Fargate has GetAuthorizationToken permission on all resources by default', - }, - ], - true - ); - // Add permission to task role const icav2SecretObj = secretsManager.Secret.fromSecretNameV2( this, @@ -119,6 +102,23 @@ export class OraDecompressionConstruct extends Construct { // Allow step function to run the ECS task taskDefinition.grantRun(this.sfnObject); + // FIXME - cdk nag error on fargate task definition role + // { + // "Action": "ecr:GetAuthorizationToken", + // "Effect": "Allow", + // "Resource": "*" + // }, + NagSuppressions.addResourceSuppressions( + taskDefinition, + [ + { + id: 'AwsSolutions-IAM5', + reason: 'Fargate has GetAuthorizationToken permission on all resources by default', + }, + ], + true + ); + /* Grant the state machine access to monitor the tasks */ this.sfnObject.addToRolePolicy( new iam.PolicyStatement({ diff --git a/lib/workload/stateless/stacks/ora-decompression-manager/deploy/index.ts b/lib/workload/stateless/stacks/ora-decompression-manager/deploy/index.ts index a208b693b..637ae52c5 100644 --- a/lib/workload/stateless/stacks/ora-decompression-manager/deploy/index.ts +++ b/lib/workload/stateless/stacks/ora-decompression-manager/deploy/index.ts @@ -1,6 +1,5 @@ import * as cdk from 'aws-cdk-lib'; import { Construct } from 'constructs'; -import * as ssm from 'aws-cdk-lib/aws-ssm'; import * as events from 'aws-cdk-lib/aws-events'; import * as secretsManager from 'aws-cdk-lib/aws-secretsmanager'; import { OraDecompressionConstruct } from '../../../../components/ora-file-decompression-fq-pair-sfn';