diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/index.ts b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/index.ts index 1f9941c88..510dfa1a7 100644 --- a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/index.ts +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/index.ts @@ -4,6 +4,7 @@ import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; import * as ssm from 'aws-cdk-lib/aws-ssm'; import { NewSamplesheetEventShowerConstruct } from './part_1/samplesheet-event-shower'; import { NewFastqListRowsEventShowerConstruct } from './part_2/fastq-list-rows-event-shower'; +import * as secretsManager from 'aws-cdk-lib/aws-secretsmanager'; /* Provide the glue to push 'shower' events @@ -13,6 +14,7 @@ When either new fastq list rows arrive or when a new samplesheet arrives export interface showerGlueHandlerConstructProps { eventBusObj: events.IEventBus; instrumentRunTableObj: dynamodb.ITableV2; + icav2AccessTokenSecretObj: secretsManager.ISecret; } export class showerGlueHandlerConstruct extends Construct { @@ -40,10 +42,12 @@ export class showerGlueHandlerConstruct extends Construct { this, 'fastq_list_rows_shower', { - // Event bus + /* Event bus */ eventBusObj: props.eventBusObj, - // Tables + /* Tables */ tableObj: props.instrumentRunTableObj, + /* Secrets */ + icav2AccessTokenSecretObj: props.icav2AccessTokenSecretObj, } ); } diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/index.ts b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/index.ts index 2b1b03bbf..72e31e546 100644 --- a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/index.ts +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/index.ts @@ -16,11 +16,25 @@ import * as eventsTargets from 'aws-cdk-lib/aws-events-targets'; import path from 'path'; import { LambdaB64GzTranslatorConstruct } from '../../../../../../../components/python-lambda-b64gz-translator'; import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; -import { Architecture, Runtime } from 'aws-cdk-lib/aws-lambda'; +import { + Architecture, + DockerImageCode, + DockerImageFunction, + Runtime, +} from 'aws-cdk-lib/aws-lambda'; +import { Duration } from 'aws-cdk-lib'; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import * as secretsManager from 'aws-cdk-lib/aws-secretsmanager'; export interface NewFastqListRowsEventShowerConstructProps { - tableObj: dynamodb.ITableV2; + /* Event Bus */ eventBusObj: events.IEventBus; + + /* Tables */ + tableObj: dynamodb.ITableV2; + + /* Secrets */ + icav2AccessTokenSecretObj: secretsManager.ISecret; } export class NewFastqListRowsEventShowerConstruct extends Construct { @@ -34,6 +48,7 @@ export class NewFastqListRowsEventShowerConstruct extends Construct { subject: 'subject', library: 'library', project: 'project', + fastqListRow: 'fastq_list_row', }, // Set Event Triggers triggerSource: 'orcabus.workflowmanager', @@ -60,7 +75,6 @@ export class NewFastqListRowsEventShowerConstruct extends Construct { constructor(scope: Construct, id: string, props: NewFastqListRowsEventShowerConstructProps) { super(scope, id); - /* Part 1: Build the lambdas */ @@ -73,6 +87,14 @@ export class NewFastqListRowsEventShowerConstruct extends Construct { } ).lambdaObj; + const cleanupFastqListRowLambda = new PythonFunction(this, 'cleanup_fastq_list_rows_lambda', { + entry: path.join(__dirname, 'lambdas', 'clean_up_fastq_list_rows_py'), + index: 'clean_up_fastq_list_rows.py', + handler: 'handler', + runtime: Runtime.PYTHON_3_12, + architecture: Architecture.ARM_64, + }); + // Generate Data Objects // Translate the libraryrunstatechange event const generateEventDataObjsLambda = new PythonFunction( @@ -87,6 +109,66 @@ export class NewFastqListRowsEventShowerConstruct extends Construct { } ); + // Add the demux stats + const generateDemuxStatsLambda = new PythonFunction(this, 'generate_demux_stats_py', { + entry: path.join(__dirname, 'lambdas', 'get_demultiplex_stats_py'), + index: 'get_demultiplex_stats.py', + handler: 'handler', + runtime: Runtime.PYTHON_3_12, + architecture: Architecture.ARM_64, + memorySize: 1024, // Don't want pandas to kill the lambda + environment: { + ICAV2_ACCESS_TOKEN_SECRET_ID: props.icav2AccessTokenSecretObj.secretName, + }, + timeout: Duration.seconds(300), + }); + + // Give fastqc stats lambda permission to access the secret + props.icav2AccessTokenSecretObj.grantRead(generateDemuxStatsLambda.currentVersion); + + // Get the fastqc stats + const architecture = lambda.Architecture.ARM_64; + const getFastqcStats = new DockerImageFunction(this, 'get_fastqc_stats', { + description: 'Get Fastqc stats from first 1 million reads', + code: DockerImageCode.fromImageAsset(path.join(__dirname, 'lambdas/get_fastqc_stats'), { + file: 'Dockerfile', + buildArgs: { + platform: architecture.dockerPlatform, + }, + }), + // Pulling data from icav2 can take time + timeout: Duration.seconds(180), // Maximum length of lambda duration is 15 minutes + retryAttempts: 0, // Never perform a retry if it fails + memorySize: 2048, // Don't want pandas to kill the lambda + architecture: architecture, + environment: { + ICAV2_ACCESS_TOKEN_SECRET_ID: props.icav2AccessTokenSecretObj.secretName, + }, + }); + + // Give fastqc stats lambda permission to access the secret + props.icav2AccessTokenSecretObj.grantRead(getFastqcStats.currentVersion); + + // Get the sequali stats + const getSequaliStatsLambdaObj = new DockerImageFunction(this, 'get_sequali_stats', { + description: 'Get the sequali stats from first 1 million reads', + code: DockerImageCode.fromImageAsset(path.join(__dirname, 'lambdas/get_sequali_stats'), { + file: 'Dockerfile', + buildArgs: { + platform: architecture.dockerPlatform, + }, + }), + memorySize: 2048, // Don't want pandas to kill the lambda + timeout: Duration.seconds(300), + architecture: Architecture.ARM_64, + environment: { + ICAV2_ACCESS_TOKEN_SECRET_ID: props.icav2AccessTokenSecretObj.secretName, + }, + }); + + // Give the lambda permission to access the secret + props.icav2AccessTokenSecretObj.grantRead(getSequaliStatsLambdaObj.currentVersion); + /* Part 2: Build state machine */ @@ -147,12 +229,21 @@ export class NewFastqListRowsEventShowerConstruct extends Construct { this.newFastqListRowsEventShowerMap.tablePartition.instrumentRun, __project_table_partition_name__: this.newFastqListRowsEventShowerMap.tablePartition.project, + __fastq_list_row_table_partition_name__: + this.newFastqListRowsEventShowerMap.tablePartition.fastqListRow, /* Lambda functions */ __decompress_fastq_list_rows_lambda_function_arn__: decompressFastqListRowLambda.currentVersion.functionArn, + __clean_up_fastq_list_rows_lambda_function_arn__: + cleanupFastqListRowLambda.currentVersion.functionArn, __generate_event_maps_lambda_function_arn__: generateEventDataObjsLambda.currentVersion.functionArn, + __get_read_counts_per_rgid_lambda_function_arn__: + generateDemuxStatsLambda.currentVersion.functionArn, + __get_fastqc_stats_lambda_function_arn__: getFastqcStats.currentVersion.functionArn, + __get_sequali_stats_lambda_function_arn__: + getSequaliStatsLambdaObj.currentVersion.functionArn, }, }); @@ -163,7 +254,14 @@ export class NewFastqListRowsEventShowerConstruct extends Construct { props.tableObj.grantReadWriteData(this.stateMachineObj); /* Allow state machine to invoke lambda */ - [decompressFastqListRowLambda, generateEventDataObjsLambda].forEach((lambda) => { + [ + decompressFastqListRowLambda, + generateEventDataObjsLambda, + generateDemuxStatsLambda, + getFastqcStats, + getSequaliStatsLambdaObj, + cleanupFastqListRowLambda, + ].forEach((lambda) => { lambda.currentVersion.grantInvoke(this.stateMachineObj.role); }); diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/clean_up_fastq_list_rows_py/clean_up_fastq_list_rows.py b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/clean_up_fastq_list_rows_py/clean_up_fastq_list_rows.py new file mode 100644 index 000000000..20b246d5f --- /dev/null +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/clean_up_fastq_list_rows_py/clean_up_fastq_list_rows.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 + +""" +Clean up the fastq list rows +* convert uppercase to lowercase +* extend rgid to contain the instrument run and the sample name + +* Otherwise very hard to match the fastq files to the sample names + +# [ +# { +# "RGID": "GAATTCGT.TTATGAGT.1", +# "RGSM": "L2400102", +# "RGLB": "L2400102", +# "Lane": 1, +# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R1_001.fastq.gz", +# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R2_001.fastq.gz" +# }, +# { +# "RGID": "GTGACGTT.TCCCAGAT.4", +# "RGSM": "L2400257", +# "RGLB": "L2400257", +# "Lane": 4, +# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R1_001.fastq.gz", +# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R2_001.fastq.gz" +# } +# ] + +To + +# [ +# { +# "rgid": "GAATTCGT.TTATGAGT.1.240229_A00130_0288_BH5HM2DSXC.L2400102", +# "rgsm": "L2400102", +# "rglb": "L2400102", +# "lane": 1, +# "read1fileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R1_001.fastq.gz", +# "read2fileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R2_001.fastq.gz" +# }, +# { +# "rgid": "GTGACGTT.TCCCAGAT.4.240229_A00130_0288_BH5HM2DSXC.L2400257", +# "rgsm": "L2400257", +# "rglb": "L2400257", +# "lane": 4, +# "read1fileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R1_001.fastq.gz", +# "read2fileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R2_001.fastq.gz" +# } +# ] +""" + +from typing import Dict + + +def handler(event, context) -> Dict: + """ + + Given the instrument run id and fastq list rows, return the fastq list rows with the rgid + extended to contain the instrument run id and the sample name + + All keys should be from UPPERCASE / PascalCase to camelCase + + :param event: + :param context: + :return: + """ + + # Get inputs + instrument_run_id = event["instrument_run_id"] + fastq_list_rows = event["fastq_list_rows"] + + # Clean up the fastq list rows + fastq_list_rows = list( + map( + lambda fastq_list_row_iter_: { + "rgid": f"{fastq_list_row_iter_['RGID']}.{instrument_run_id}.{fastq_list_row_iter_['RGSM']}", + "rgsm": fastq_list_row_iter_["RGSM"], + "rglb": fastq_list_row_iter_["RGLB"], + "lane": fastq_list_row_iter_["Lane"], + "read1FileUri": fastq_list_row_iter_["Read1FileUri"], + "read2FileUri": fastq_list_row_iter_["Read2FileUri"] + }, + fastq_list_rows + ) + ) + + # Return the fastq list rows + return { + "fastq_list_rows": fastq_list_rows + } + + +# if __name__ == "__main__": +# import json +# +# print( +# json.dumps( +# handler( +# { +# "instrument_run_id": "240229_A00130_0288_BH5HM2DSXC", +# "fastq_list_rows": [ +# { +# "RGID": "GAATTCGT.TTATGAGT.1", +# "RGSM": "L2400102", +# "RGLB": "L2400102", +# "Lane": 1, +# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R1_001.fastq.gz", +# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R2_001.fastq.gz" +# }, +# { +# "RGID": "GTGACGTT.TCCCAGAT.4", +# "RGSM": "L2400257", +# "RGLB": "L2400257", +# "Lane": 4, +# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R1_001.fastq.gz", +# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R2_001.fastq.gz" +# } +# ] +# }, +# None +# ), +# indent=4 +# ) +# ) +# +# # { +# # "fastq_list_rows": [ +# # { +# # "rgid": "GAATTCGT.TTATGAGT.1.240229_A00130_0288_BH5HM2DSXC.L2400102", +# # "rgsm": "L2400102", +# # "rglb": "L2400102", +# # "lane": 1, +# # "read1fileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R1_001.fastq.gz", +# # "read2fileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R2_001.fastq.gz" +# # }, +# # { +# # "rgid": "GTGACGTT.TCCCAGAT.4.240229_A00130_0288_BH5HM2DSXC.L2400257", +# # "rgsm": "L2400257", +# # "rglb": "L2400257", +# # "lane": 4, +# # "read1fileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R1_001.fastq.gz", +# # "read2fileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R2_001.fastq.gz" +# # } +# # ] +# # } diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/generate_event_data_objects_py/generate_event_data_objects.py b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/generate_event_data_objects_py/generate_event_data_objects.py index 67be21985..54ff31638 100644 --- a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/generate_event_data_objects_py/generate_event_data_objects.py +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/generate_event_data_objects_py/generate_event_data_objects.py @@ -31,37 +31,20 @@ def generate_fastq_list_row_event(fastq_list_row: Dict, library: Dict, instrumen """ Generate the fastq list row event + :param num_readcount_obj_list: + :param qc_obj_list: + :param library: :param fastq_list_row: :param instrument_run_id: :return: """ - - # Get the new fastq list row dict - new_fastq_list_row_dict = {} - - # Iterate through the fastq list row - for key, value in fastq_list_row.items(): - if key.lower().startswith("rg") or key.lower() == "lane": - new_fastq_list_row_dict[key.lower()] = value - continue - new_fastq_list_row_dict[pascal_to_camel_case(key)] = value - - fastq_list_row_rgid = '.'.join( - [ - new_fastq_list_row_dict["rgid"], - instrument_run_id, - new_fastq_list_row_dict["rgsm"], - ] - ) - new_fastq_list_row_dict["rgid"] = fastq_list_row_rgid - return { "instrumentRunId": instrument_run_id, "library": { "libraryId": library.get("libraryId"), "orcabusId": library.get("orcabusId") }, - "fastqListRow": new_fastq_list_row_dict, + "fastqListRow": fastq_list_row, } @@ -82,11 +65,11 @@ def handler(event, context): # Generate the fastq list row events fastq_list_row_event_data_list = list( map( - lambda fastq_list_row_iter: generate_fastq_list_row_event( - fastq_list_row_iter, + lambda fastq_list_row_iter_: generate_fastq_list_row_event( + fastq_list_row_iter_, next( filter( - lambda library_iter: library_iter['libraryId'] == fastq_list_row_iter['RGSM'], + lambda library_iter_: library_iter_['libraryId'] == fastq_list_row_iter_['rgsm'], library_obj_list ) ), @@ -173,7 +156,7 @@ def handler(event, context): } -# # Test the function +# Test the function # if __name__ == "__main__": # import json # @@ -554,306 +537,304 @@ def handler(event, context): # ], # "fastq_list_rows": [ # { -# "RGID": "GAATTCGT.TTATGAGT.1", -# "RGSM": "L2400102", -# "RGLB": "L2400102", -# "Lane": 1, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R2_001.fastq.gz" +# "rgid": "GAATTCGT.TTATGAGT.1.240229_A00130_0288_BH5HM2DSXC.L2400102", +# "rgsm": "L2400102", +# "rglb": "L2400102", +# "lane": 1, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400102/L2400102_S1_L001_R2_001.fastq.gz" # }, # { -# "RGID": "GAGAATGGTT.TTGCTGCCGA.1", -# "RGSM": "L2400159", -# "RGLB": "L2400159", -# "Lane": 1, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400159/L2400159_S2_L001_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400159/L2400159_S2_L001_R2_001.fastq.gz" +# "rgid": "GAGAATGGTT.TTGCTGCCGA.1.240229_A00130_0288_BH5HM2DSXC.L2400159", +# "rgsm": "L2400159", +# "rglb": "L2400159", +# "lane": 1, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400159/L2400159_S2_L001_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400159/L2400159_S2_L001_R2_001.fastq.gz" # }, # { -# "RGID": "AGAGGCAACC.CCATCATTAG.1", -# "RGSM": "L2400160", -# "RGLB": "L2400160", -# "Lane": 1, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400160/L2400160_S3_L001_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400160/L2400160_S3_L001_R2_001.fastq.gz" +# "rgid": "AGAGGCAACC.CCATCATTAG.1.240229_A00130_0288_BH5HM2DSXC.L2400160", +# "rgsm": "L2400160", +# "rglb": "L2400160", +# "lane": 1, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400160/L2400160_S3_L001_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400160/L2400160_S3_L001_R2_001.fastq.gz" # }, # { -# "RGID": "CCATCATTAG.AGAGGCAACC.1", -# "RGSM": "L2400161", -# "RGLB": "L2400161", -# "Lane": 1, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400161/L2400161_S4_L001_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400161/L2400161_S4_L001_R2_001.fastq.gz" +# "rgid": "CCATCATTAG.AGAGGCAACC.1.240229_A00130_0288_BH5HM2DSXC.L2400161", +# "rgsm": "L2400161", +# "rglb": "L2400161", +# "lane": 1, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400161/L2400161_S4_L001_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400161/L2400161_S4_L001_R2_001.fastq.gz" # }, # { -# "RGID": "GATAGGCCGA.GCCATGTGCG.1", -# "RGSM": "L2400162", -# "RGLB": "L2400162", -# "Lane": 1, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400162/L2400162_S5_L001_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400162/L2400162_S5_L001_R2_001.fastq.gz" +# "rgid": "GATAGGCCGA.GCCATGTGCG.1.240229_A00130_0288_BH5HM2DSXC.L2400162", +# "rgsm": "L2400162", +# "rglb": "L2400162", +# "lane": 1, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400162/L2400162_S5_L001_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400162/L2400162_S5_L001_R2_001.fastq.gz" # }, # { -# "RGID": "ATGGTTGACT.AGGACAGGCC.1", -# "RGSM": "L2400163", -# "RGLB": "L2400163", -# "Lane": 1, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400163/L2400163_S6_L001_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400163/L2400163_S6_L001_R2_001.fastq.gz" +# "rgid": "ATGGTTGACT.AGGACAGGCC.1.240229_A00130_0288_BH5HM2DSXC.L2400163", +# "rgsm": "L2400163", +# "rglb": "L2400163", +# "lane": 1, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400163/L2400163_S6_L001_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400163/L2400163_S6_L001_R2_001.fastq.gz" # }, # { -# "RGID": "TATTGCGCTC.CCTAACACAG.1", -# "RGSM": "L2400164", -# "RGLB": "L2400164", -# "Lane": 1, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400164/L2400164_S7_L001_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400164/L2400164_S7_L001_R2_001.fastq.gz" +# "rgid": "TATTGCGCTC.CCTAACACAG.1.240229_A00130_0288_BH5HM2DSXC.L2400164", +# "rgsm": "L2400164", +# "rglb": "L2400164", +# "lane": 1, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400164/L2400164_S7_L001_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400164/L2400164_S7_L001_R2_001.fastq.gz" # }, # { -# "RGID": "TTCTACATAC.TTACAGTTAG.1", -# "RGSM": "L2400166", -# "RGLB": "L2400166", -# "Lane": 1, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400166/L2400166_S8_L001_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400166/L2400166_S8_L001_R2_001.fastq.gz" +# "rgid": "TTCTACATAC.TTACAGTTAG.1.240229_A00130_0288_BH5HM2DSXC.L2400166", +# "rgsm": "L2400166", +# "rglb": "L2400166", +# "lane": 1, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400166/L2400166_S8_L001_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_1/L2400166/L2400166_S8_L001_R2_001.fastq.gz" # }, # { -# "RGID": "ATGAGGCC.CAATTAAC.2", -# "RGSM": "L2400195", -# "RGLB": "L2400195", -# "Lane": 2, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400195/L2400195_S9_L002_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400195/L2400195_S9_L002_R2_001.fastq.gz" +# "rgid": "ATGAGGCC.CAATTAAC.2.240229_A00130_0288_BH5HM2DSXC.L2400195", +# "rgsm": "L2400195", +# "rglb": "L2400195", +# "lane": 2, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400195/L2400195_S9_L002_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400195/L2400195_S9_L002_R2_001.fastq.gz" # }, # { -# "RGID": "ACTAAGAT.CCGCGGTT.2", -# "RGSM": "L2400196", -# "RGLB": "L2400196", -# "Lane": 2, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400196/L2400196_S10_L002_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400196/L2400196_S10_L002_R2_001.fastq.gz" +# "rgid": "ACTAAGAT.CCGCGGTT.2.240229_A00130_0288_BH5HM2DSXC.L2400196", +# "rgsm": "L2400196", +# "rglb": "L2400196", +# "lane": 2, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400196/L2400196_S10_L002_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400196/L2400196_S10_L002_R2_001.fastq.gz" # }, # { -# "RGID": "GTCGGAGC.TTATAACC.2", -# "RGSM": "L2400197", -# "RGLB": "L2400197", -# "Lane": 2, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400197/L2400197_S11_L002_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400197/L2400197_S11_L002_R2_001.fastq.gz" +# "rgid": "GTCGGAGC.TTATAACC.2.240229_A00130_0288_BH5HM2DSXC.L2400197", +# "rgsm": "L2400197", +# "rglb": "L2400197", +# "lane": 2, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400197/L2400197_S11_L002_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400197/L2400197_S11_L002_R2_001.fastq.gz" # }, # { -# "RGID": "TCGTAGTG.CCAAGTCT.2", -# "RGSM": "L2400231", -# "RGLB": "L2400231", -# "Lane": 2, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400231/L2400231_S12_L002_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400231/L2400231_S12_L002_R2_001.fastq.gz" +# "rgid": "TCGTAGTG.CCAAGTCT.2.240229_A00130_0288_BH5HM2DSXC.L2400231", +# "rgsm": "L2400231", +# "rglb": "L2400231", +# "lane": 2, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400231/L2400231_S12_L002_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400231/L2400231_S12_L002_R2_001.fastq.gz" # }, # { -# "RGID": "GGAGCGTC.GCACGGAC.2", -# "RGSM": "L2400238", -# "RGLB": "L2400238", -# "Lane": 2, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400238/L2400238_S13_L002_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400238/L2400238_S13_L002_R2_001.fastq.gz" +# "rgid": "GGAGCGTC.GCACGGAC.2.240229_A00130_0288_BH5HM2DSXC.L2400238", +# "rgsm": "L2400238", +# "rglb": "L2400238", +# "lane": 2, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400238/L2400238_S13_L002_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400238/L2400238_S13_L002_R2_001.fastq.gz" # }, # { -# "RGID": "ATGGCATG.GGTACCTT.2", -# "RGSM": "L2400239", -# "RGLB": "L2400239", -# "Lane": 2, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400239/L2400239_S14_L002_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400239/L2400239_S14_L002_R2_001.fastq.gz" +# "rgid": "ATGGCATG.GGTACCTT.2.240229_A00130_0288_BH5HM2DSXC.L2400239", +# "rgsm": "L2400239", +# "rglb": "L2400239", +# "lane": 2, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400239/L2400239_S14_L002_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400239/L2400239_S14_L002_R2_001.fastq.gz" # }, # { -# "RGID": "GCAATGCA.AACGTTCC.2", -# "RGSM": "L2400240", -# "RGLB": "L2400240", -# "Lane": 2, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400240/L2400240_S15_L002_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400240/L2400240_S15_L002_R2_001.fastq.gz" +# "rgid": "GCAATGCA.AACGTTCC.2.240229_A00130_0288_BH5HM2DSXC.L2400240", +# "rgsm": "L2400240", +# "rglb": "L2400240", +# "lane": 2, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400240/L2400240_S15_L002_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_2/L2400240/L2400240_S15_L002_R2_001.fastq.gz" # }, # { -# "RGID": "ATGAGGCC.CAATTAAC.3", -# "RGSM": "L2400195", -# "RGLB": "L2400195", -# "Lane": 3, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400195/L2400195_S9_L003_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400195/L2400195_S9_L003_R2_001.fastq.gz" +# "rgid": "ATGAGGCC.CAATTAAC.3.240229_A00130_0288_BH5HM2DSXC.L2400195", +# "rgsm": "L2400195", +# "rglb": "L2400195", +# "lane": 3, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400195/L2400195_S9_L003_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400195/L2400195_S9_L003_R2_001.fastq.gz" # }, # { -# "RGID": "ACTAAGAT.CCGCGGTT.3", -# "RGSM": "L2400196", -# "RGLB": "L2400196", -# "Lane": 3, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400196/L2400196_S10_L003_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400196/L2400196_S10_L003_R2_001.fastq.gz" +# "rgid": "ACTAAGAT.CCGCGGTT.3.240229_A00130_0288_BH5HM2DSXC.L2400196", +# "rgsm": "L2400196", +# "rglb": "L2400196", +# "lane": 3, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400196/L2400196_S10_L003_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400196/L2400196_S10_L003_R2_001.fastq.gz" # }, # { -# "RGID": "GTCGGAGC.TTATAACC.3", -# "RGSM": "L2400197", -# "RGLB": "L2400197", -# "Lane": 3, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400197/L2400197_S11_L003_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400197/L2400197_S11_L003_R2_001.fastq.gz" +# "rgid": "GTCGGAGC.TTATAACC.3.240229_A00130_0288_BH5HM2DSXC.L2400197", +# "rgsm": "L2400197", +# "rglb": "L2400197", +# "lane": 3, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400197/L2400197_S11_L003_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400197/L2400197_S11_L003_R2_001.fastq.gz" # }, # { -# "RGID": "TCGTAGTG.CCAAGTCT.3", -# "RGSM": "L2400231", -# "RGLB": "L2400231", -# "Lane": 3, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400231/L2400231_S12_L003_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400231/L2400231_S12_L003_R2_001.fastq.gz" +# "rgid": "TCGTAGTG.CCAAGTCT.3.240229_A00130_0288_BH5HM2DSXC.L2400231", +# "rgsm": "L2400231", +# "rglb": "L2400231", +# "lane": 3, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400231/L2400231_S12_L003_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400231/L2400231_S12_L003_R2_001.fastq.gz" # }, # { -# "RGID": "GGAGCGTC.GCACGGAC.3", -# "RGSM": "L2400238", -# "RGLB": "L2400238", -# "Lane": 3, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400238/L2400238_S13_L003_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400238/L2400238_S13_L003_R2_001.fastq.gz" +# "rgid": "GGAGCGTC.GCACGGAC.3.240229_A00130_0288_BH5HM2DSXC.L2400238", +# "rgsm": "L2400238", +# "rglb": "L2400238", +# "lane": 3, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400238/L2400238_S13_L003_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400238/L2400238_S13_L003_R2_001.fastq.gz" # }, # { -# "RGID": "ATGGCATG.GGTACCTT.3", -# "RGSM": "L2400239", -# "RGLB": "L2400239", -# "Lane": 3, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400239/L2400239_S14_L003_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400239/L2400239_S14_L003_R2_001.fastq.gz" +# "rgid": "ATGGCATG.GGTACCTT.3.240229_A00130_0288_BH5HM2DSXC.L2400239", +# "rgsm": "L2400239", +# "rglb": "L2400239", +# "lane": 3, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400239/L2400239_S14_L003_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400239/L2400239_S14_L003_R2_001.fastq.gz" # }, # { -# "RGID": "GCAATGCA.AACGTTCC.3", -# "RGSM": "L2400240", -# "RGLB": "L2400240", -# "Lane": 3, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400240/L2400240_S15_L003_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400240/L2400240_S15_L003_R2_001.fastq.gz" +# "rgid": "GCAATGCA.AACGTTCC.3.240229_A00130_0288_BH5HM2DSXC.L2400240", +# "rgsm": "L2400240", +# "rglb": "L2400240", +# "lane": 3, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400240/L2400240_S15_L003_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_3/L2400240/L2400240_S15_L003_R2_001.fastq.gz" # }, # { -# "RGID": "ACGCCTTGTT.ACGTTCCTTA.4", -# "RGSM": "L2400165", -# "RGLB": "L2400165", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400165/L2400165_S16_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400165/L2400165_S16_L004_R2_001.fastq.gz" +# "rgid": "ACGCCTTGTT.ACGTTCCTTA.4.240229_A00130_0288_BH5HM2DSXC.L2400165", +# "rgsm": "L2400165", +# "rglb": "L2400165", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400165/L2400165_S16_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400165/L2400165_S16_L004_R2_001.fastq.gz" # }, # { -# "RGID": "GCACGGAC.TGCGAGAC.4", -# "RGSM": "L2400191", -# "RGLB": "L2400191", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400191/L2400191_S17_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400191/L2400191_S17_L004_R2_001.fastq.gz" +# "rgid": "GCACGGAC.TGCGAGAC.4.240229_A00130_0288_BH5HM2DSXC.L2400191", +# "rgsm": "L2400191", +# "rglb": "L2400191", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400191/L2400191_S17_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400191/L2400191_S17_L004_R2_001.fastq.gz" # }, # { -# "RGID": "GTCGGAGC.TTATAACC.4", -# "RGSM": "L2400197", -# "RGLB": "L2400197", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400197/L2400197_S11_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400197/L2400197_S11_L004_R2_001.fastq.gz" -# }, +# "rgid": "GTCGGAGC.TTATAACC.4.240229_A00130_0288_BH5HM2DSXC.L2400197", +# "rgsm": "L2400197", +# "rglb": "L2400197", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400197/L2400197_S11_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400197/L2400197_S11_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "CTTGGTAT.GGACTTGG.4", -# "RGSM": "L2400198", -# "RGLB": "L2400198", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400198/L2400198_S18_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400198/L2400198_S18_L004_R2_001.fastq.gz" -# }, +# "rgid": "CTTGGTAT.GGACTTGG.4.240229_A00130_0288_BH5HM2DSXC.L2400198", +# "rgsm": "L2400198", +# "rglb": "L2400198", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400198/L2400198_S18_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400198/L2400198_S18_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "GTTCCAAT.GCAGAATT.4", -# "RGSM": "L2400241", -# "RGLB": "L2400241", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400241/L2400241_S19_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400241/L2400241_S19_L004_R2_001.fastq.gz" -# }, +# "rgid": "GTTCCAAT.GCAGAATT.4.240229_A00130_0288_BH5HM2DSXC.L2400241", +# "rgsm": "L2400241", +# "rglb": "L2400241", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400241/L2400241_S19_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400241/L2400241_S19_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "ACCTTGGC.ATGAGGCC.4", -# "RGSM": "L2400242", -# "RGLB": "L2400242", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400242/L2400242_S20_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400242/L2400242_S20_L004_R2_001.fastq.gz" -# }, +# "rgid": "ACCTTGGC.ATGAGGCC.4.240229_A00130_0288_BH5HM2DSXC.L2400242", +# "rgsm": "L2400242", +# "rglb": "L2400242", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400242/L2400242_S20_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400242/L2400242_S20_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "AGTTTCGA.CCTACGAT.4", -# "RGSM": "L2400249", -# "RGLB": "L2400249", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400249/L2400249_S21_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400249/L2400249_S21_L004_R2_001.fastq.gz" -# }, +# "rgid": "AGTTTCGA.CCTACGAT.4.240229_A00130_0288_BH5HM2DSXC.L2400249", +# "rgsm": "L2400249", +# "rglb": "L2400249", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400249/L2400249_S21_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400249/L2400249_S21_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "GAACCTCT.GTCTGCGC.4", -# "RGSM": "L2400250", -# "RGLB": "L2400250", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400250/L2400250_S22_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400250/L2400250_S22_L004_R2_001.fastq.gz" -# }, +# "rgid": "GAACCTCT.GTCTGCGC.4.240229_A00130_0288_BH5HM2DSXC.L2400250", +# "rgsm": "L2400250", +# "rglb": "L2400250", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400250/L2400250_S22_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400250/L2400250_S22_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "GCCCAGTG.CCGCAATT.4", -# "RGSM": "L2400251", -# "RGLB": "L2400251", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400251/L2400251_S23_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400251/L2400251_S23_L004_R2_001.fastq.gz" -# }, +# "rgid": "GCCCAGTG.CCGCAATT.4.240229_A00130_0288_BH5HM2DSXC.L2400251", +# "rgsm": "L2400251", +# "rglb": "L2400251", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400251/L2400251_S23_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400251/L2400251_S23_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "TGACAGCT.CCCGTAGG.4", -# "RGSM": "L2400252", -# "RGLB": "L2400252", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400252/L2400252_S24_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400252/L2400252_S24_L004_R2_001.fastq.gz" -# }, +# "rgid": "TGACAGCT.CCCGTAGG.4.240229_A00130_0288_BH5HM2DSXC.L2400252", +# "rgsm": "L2400252", +# "rglb": "L2400252", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400252/L2400252_S24_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400252/L2400252_S24_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "CATCACCC.ATATAGCA.4", -# "RGSM": "L2400253", -# "RGLB": "L2400253", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400253/L2400253_S25_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400253/L2400253_S25_L004_R2_001.fastq.gz" -# }, +# "rgid": "CATCACCC.ATATAGCA.4.240229_A00130_0288_BH5HM2DSXC.L2400253", +# "rgsm": "L2400253", +# "rglb": "L2400253", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400253/L2400253_S25_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400253/L2400253_S25_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "CTGGAGTA.GTTCGGTT.4", -# "RGSM": "L2400254", -# "RGLB": "L2400254", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400254/L2400254_S26_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400254/L2400254_S26_L004_R2_001.fastq.gz" -# }, +# "rgid": "CTGGAGTA.GTTCGGTT.4.240229_A00130_0288_BH5HM2DSXC.L2400254", +# "rgsm": "L2400254", +# "rglb": "L2400254", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400254/L2400254_S26_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400254/L2400254_S26_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "GATCCGGG.AAGCAGGT.4", -# "RGSM": "L2400255", -# "RGLB": "L2400255", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400255/L2400255_S27_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400255/L2400255_S27_L004_R2_001.fastq.gz" -# }, +# "rgid": "GATCCGGG.AAGCAGGT.4.240229_A00130_0288_BH5HM2DSXC.L2400255", +# "rgsm": "L2400255", +# "rglb": "L2400255", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400255/L2400255_S27_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400255/L2400255_S27_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "AACACCTG.CGCATGGG.4", -# "RGSM": "L2400256", -# "RGLB": "L2400256", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400256/L2400256_S28_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400256/L2400256_S28_L004_R2_001.fastq.gz" -# }, +# "rgid": "AACACCTG.CGCATGGG.4.240229_A00130_0288_BH5HM2DSXC.L2400256", +# "rgsm": "L2400256", +# "rglb": "L2400256", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400256/L2400256_S28_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400256/L2400256_S28_L004_R2_001.fastq.gz" +# }, # { -# "RGID": "GTGACGTT.TCCCAGAT.4", -# "RGSM": "L2400257", -# "RGLB": "L2400257", -# "Lane": 4, -# "Read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R1_001.fastq.gz", -# "Read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R2_001.fastq.gz" +# "rgid": "GTGACGTT.TCCCAGAT.4.240229_A00130_0288_BH5HM2DSXC.L2400257", +# "rgsm": "L2400257", +# "rglb": "L2400257", +# "lane": 4, +# "read1FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R1_001.fastq.gz", +# "read2FileUri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400257/L2400257_S29_L004_R2_001.fastq.gz" # } # ], # "instrument_run_id": "240229_A00130_0288_BH5HM2DSXC" -# } -# -# , +# }, # None # ) # , @@ -2033,4 +2014,4 @@ def handler(event, context): # # "complete_fastq_list_row_shower_event_data": { # # "instrumentRunId": "240229_A00130_0288_BH5HM2DSXC" # # } -# # } +# # } \ No newline at end of file diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_demultiplex_stats_py/get_demultiplex_stats.py b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_demultiplex_stats_py/get_demultiplex_stats.py new file mode 100644 index 000000000..12fe6c70f --- /dev/null +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_demultiplex_stats_py/get_demultiplex_stats.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python + +""" +Given a demux.csv path and an instrument run id, + +Pair the read counts, and quality match scores with a fastq list row id + +Given we run fastqc, the only relevant information is the read count + +Lane,SampleID,Index,# Reads,# Perfect Index Reads,# One Mismatch Index Reads,# Two Mismatch Index Reads,% Reads,% Perfect Index Reads,% One Mismatch Index Reads,% Two Mismatch Index Reads +1,LPRJ241644,CTGATCGT-GCGCATAT,1115618424,1100614240,15004184,0,0.3422,0.9866,0.0134,0.0000 +1,LPRJ241645,ACTCTCGA-CTGTACCA,761004667,751268217,9736450,0,0.2335,0.9872,0.0128,0.0000 +1,LPRJ241646,TGAGCTAG-ACCGGTTA,718053113,709117597,8935516,0,0.2203,0.9876,0.0124,0.0000 +1,LPRJ241653,ATCGATCG-TGGAAGCA,416153434,404733328,11420106,0,0.1277,0.9726,0.0274,0.0000 +1,Undetermined,,248852075,248852075,0,0,0.0763,1.0000,0.0000,0.0000 +2,LPRJ241647,GAGACGAT-GAACGGTT,884772616,866133764,18638852,0,0.2638,0.9789,0.0211,0.0000 +2,LPRJ241648,CTTGTCGA-CGATGTTC,817203045,808993518,8209527,0,0.2437,0.9900,0.0100,0.0000 +2,LPRJ241649,TTCCAAGG-CTACAAGG,803395119,792639954,10755165,0,0.2396,0.9866,0.0134,0.0000 +2,LPRJ241654,GCAAGATC-AGTCGAAG,533059421,525802391,7257030,0,0.1589,0.9864,0.0136,0.0000 +2,Undetermined,,315283401,315283401,0,0,0.0940,1.0000,0.0000,0.0000 +3,LPRJ241650,CGCATGAT-AAGCCTGA,787405495,780175100,7230395,0,0.2385,0.9908,0.0092,0.0000 +3,LPRJ241651,ACGGAACA-ACGAGAAC,799004270,791310089,7694181,0,0.2420,0.9904,0.0096,0.0000 +3,LPRJ241652,CGGCTAAT-CTCGTTCT,858854271,847914368,10939903,0,0.2601,0.9873,0.0127,0.0000 +3,LPRJ241653,ATCGATCG-TGGAAGCA,284364893,276564021,7800872,0,0.0861,0.9726,0.0274,0.0000 +3,LPRJ241654,GCAAGATC-AGTCGAAG,256802569,252549724,4252845,0,0.0778,0.9834,0.0166,0.0000 +3,Undetermined,,315445082,315445082,0,0,0.0955,1.0000,0.0000,0.0000 +4,L2401469,GCGATTAA-GATCTGCT,535609536,529857703,5751833,0,0.1548,0.9893,0.0107,0.0000 +4,L2401470,ATTCAGAA-AGGCTATA,564242711,557461667,6781044,0,0.1631,0.9880,0.0120,0.0000 +4,L2401471,GAATAATC-GCCTCTAT,540809568,534156451,6653117,0,0.1563,0.9877,0.0123,0.0000 +4,L2401472,TTAATCAG-CTTCGCCT,448535365,442654228,5881137,0,0.1297,0.9869,0.0131,0.0000 +4,L2401473,CGCTCATT-TAAGATTA,539917611,534300724,5616887,0,0.1561,0.9896,0.0104,0.0000 +4,L2401474,TCCGCGAA-AGTAAGTA,546591434,538986434,7605000,0,0.1580,0.9861,0.0139,0.0000 +4,L2401475,ATTACTCG-GACTTCCT,1230685,1207314,23371,0,0.0004,0.9810,0.0190,0.0000 +4,Undetermined,,282269526,282269526,0,0,0.0816,1.0000,0.0000,0.0000 +""" + +# Standard imports +from pathlib import Path +import pandas as pd +import boto3 +import typing +import logging +import tempfile +from os import environ + +# Wrapica +from wrapica.project_data import ( + ProjectData, convert_uri_to_project_data_obj, read_icav2_file_contents +) + +# Type checking +if typing.TYPE_CHECKING: + from mypy_boto3_secretsmanager import SecretsManagerClient + +# Globals +ICAV2_BASE_URL = "https://ica.illumina.com/ica/rest" + +# Set loggers +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def get_secrets_manager_client() -> 'SecretsManagerClient': + """ + Return Secrets Manager client + """ + return boto3.client("secretsmanager") + + +def get_secret(secret_id: str) -> str: + """ + Return secret value + """ + return get_secrets_manager_client().get_secret_value(SecretId=secret_id)["SecretString"] + + +# Functions +def set_icav2_env_vars(): + """ + Set the icav2 environment variables + :return: + """ + environ["ICAV2_BASE_URL"] = ICAV2_BASE_URL + environ["ICAV2_ACCESS_TOKEN"] = get_secret( + environ["ICAV2_ACCESS_TOKEN_SECRET_ID"] + ) + + + +def get_demultiplex_stats(demux_csv_project_data_obj: ProjectData, instrument_run_id: str) -> pd.DataFrame: + """ + Get the demux df + :param demux_csv_project_data_obj: + :param instrument_run_id: + :return: + """ + with tempfile.NamedTemporaryFile(suffix=".csv") as temp_file: + read_icav2_file_contents( + demux_csv_project_data_obj.project_id, + demux_csv_project_data_obj.data.id, + output_path=Path(temp_file.name) + ) + + demux_df = pd.read_csv( + temp_file.name + ).query("SampleID != 'Undetermined'") + + # Get the fastq list row rgid by combining the index, lane, instrument run id and the sample id + demux_df['fastqListRowRgid'] = demux_df.apply( + lambda row: ".".join([ + row['Index'].replace("-", "."), + str(row['Lane']), + instrument_run_id, + row['SampleID'], + ]), + axis='columns' + ) + + return demux_df + + +def demux_stats_df_to_json(demux_stats_df: pd.DataFrame) -> dict: + """ + Convert the demux stats df to a json + :param demux_stats_df: + :return: + """ + return demux_stats_df.rename( + columns={ + '# Reads': 'numReads', + } + )[[ + "fastqListRowRgid", + "numReads", + ]].to_dict(orient='records') + + +def handler(event, context): + """ + Given the instrument run id and the path to the Demultiplex Stats csv file, + Return the read counts and quality match scores paired with the fastq list row id + :param event: + :param context: + :return: + """ + # Set ICAv2 env vars + set_icav2_env_vars() + + # Get the project data + demux_csv_project_data_obj = convert_uri_to_project_data_obj( + event['demux_uri'] + ) + + # Get the demux stats df + demux_stats_df = get_demultiplex_stats( + demux_csv_project_data_obj, + event['instrument_run_id'] + ) + + # Convert the demux stats df to a json + return { + "read_count_by_fastq_list_row": demux_stats_df_to_json(demux_stats_df) + } + + +# if __name__ == "__main__": +# import json +# environ['AWS_PROFILE'] = 'umccr-production' +# environ['AWS_REGION'] = 'ap-southeast-2' +# environ['ICAV2_ACCESS_TOKEN_SECRET_ID'] = "ICAv2JWTKey-umccr-prod-service-production" +# print( +# json.dumps( +# handler( +# { +# "demux_uri": "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production/primary/241004_A01052_0233_AHW5KMDSXC/20241006450c797a/Reports/Demultiplex_Stats.csv", +# "instrument_run_id": "241004_A01052_0233_AHW5KMDSXC" +# }, +# None, +# ), +# indent=4 +# ) +# ) +# +# # Yields +# # { +# # "read_count_by_fastq_list_row": [ +# # { +# # "fastqListRowRgid": "CTGATCGT.GCGCATAT.1.241004_A01052_0233_AHW5KMDSXC.LPRJ241644", +# # "numReads": 1115618424 +# # }, +# # { +# # "fastqListRowRgid": "ACTCTCGA.CTGTACCA.1.241004_A01052_0233_AHW5KMDSXC.LPRJ241645", +# # "numReads": 761004667 +# # }, +# # { +# # "fastqListRowRgid": "TGAGCTAG.ACCGGTTA.1.241004_A01052_0233_AHW5KMDSXC.LPRJ241646", +# # "numReads": 718053113 +# # }, +# # { +# # "fastqListRowRgid": "ATCGATCG.TGGAAGCA.1.241004_A01052_0233_AHW5KMDSXC.LPRJ241653", +# # "numReads": 416153434 +# # }, +# # { +# # "fastqListRowRgid": "GAGACGAT.GAACGGTT.2.241004_A01052_0233_AHW5KMDSXC.LPRJ241647", +# # "numReads": 884772616 +# # }, +# # { +# # "fastqListRowRgid": "CTTGTCGA.CGATGTTC.2.241004_A01052_0233_AHW5KMDSXC.LPRJ241648", +# # "numReads": 817203045 +# # }, +# # { +# # "fastqListRowRgid": "TTCCAAGG.CTACAAGG.2.241004_A01052_0233_AHW5KMDSXC.LPRJ241649", +# # "numReads": 803395119 +# # }, +# # { +# # "fastqListRowRgid": "GCAAGATC.AGTCGAAG.2.241004_A01052_0233_AHW5KMDSXC.LPRJ241654", +# # "numReads": 533059421 +# # }, +# # { +# # "fastqListRowRgid": "CGCATGAT.AAGCCTGA.3.241004_A01052_0233_AHW5KMDSXC.LPRJ241650", +# # "numReads": 787405495 +# # }, +# # { +# # "fastqListRowRgid": "ACGGAACA.ACGAGAAC.3.241004_A01052_0233_AHW5KMDSXC.LPRJ241651", +# # "numReads": 799004270 +# # }, +# # { +# # "fastqListRowRgid": "CGGCTAAT.CTCGTTCT.3.241004_A01052_0233_AHW5KMDSXC.LPRJ241652", +# # "numReads": 858854271 +# # }, +# # { +# # "fastqListRowRgid": "ATCGATCG.TGGAAGCA.3.241004_A01052_0233_AHW5KMDSXC.LPRJ241653", +# # "numReads": 284364893 +# # }, +# # { +# # "fastqListRowRgid": "GCAAGATC.AGTCGAAG.3.241004_A01052_0233_AHW5KMDSXC.LPRJ241654", +# # "numReads": 256802569 +# # }, +# # { +# # "fastqListRowRgid": "GCGATTAA.GATCTGCT.4.241004_A01052_0233_AHW5KMDSXC.L2401469", +# # "numReads": 535609536 +# # }, +# # { +# # "fastqListRowRgid": "ATTCAGAA.AGGCTATA.4.241004_A01052_0233_AHW5KMDSXC.L2401470", +# # "numReads": 564242711 +# # }, +# # { +# # "fastqListRowRgid": "GAATAATC.GCCTCTAT.4.241004_A01052_0233_AHW5KMDSXC.L2401471", +# # "numReads": 540809568 +# # }, +# # { +# # "fastqListRowRgid": "TTAATCAG.CTTCGCCT.4.241004_A01052_0233_AHW5KMDSXC.L2401472", +# # "numReads": 448535365 +# # }, +# # { +# # "fastqListRowRgid": "CGCTCATT.TAAGATTA.4.241004_A01052_0233_AHW5KMDSXC.L2401473", +# # "numReads": 539917611 +# # }, +# # { +# # "fastqListRowRgid": "TCCGCGAA.AGTAAGTA.4.241004_A01052_0233_AHW5KMDSXC.L2401474", +# # "numReads": 546591434 +# # }, +# # { +# # "fastqListRowRgid": "ATTACTCG.GACTTCCT.4.241004_A01052_0233_AHW5KMDSXC.L2401475", +# # "numReads": 1230685 +# # } +# # ] +# # } diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_demultiplex_stats_py/requirements.txt b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_demultiplex_stats_py/requirements.txt new file mode 100644 index 000000000..3cc87415f --- /dev/null +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_demultiplex_stats_py/requirements.txt @@ -0,0 +1,2 @@ +pandas==2.2.3 +wrapica==2.27.1.post20240830140737 diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_fastqc_stats/Dockerfile b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_fastqc_stats/Dockerfile new file mode 100644 index 000000000..0967cc1ac --- /dev/null +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_fastqc_stats/Dockerfile @@ -0,0 +1,45 @@ +FROM --platform=${TARGETPLATFORM} public.ecr.aws/lambda/python:3.12 + +LABEL maintainer="Alexis Lucattini" + +# Build args +ARG APP_ROOT=. +ARG FASTQC_GIT_COMMIT="1faeea0" +ARG FASTQC_VERSION="0.12.1-dev" +ARG FASTQC_GITHUB_URL="https://github.com/s-andrews/FastQC" + +# Copy over the requirements +COPY ${APP_ROOT}/requirements.txt ./ + +# Install htslib +RUN \ + dnf update -y && \ + dnf install -y \ + git \ + wget \ + java \ + java-devel \ + ant \ + perl && \ + echo "Install fastqc" 1>&2 && \ + mkdir /opt/fastqc/ && \ + ( \ + cd /opt/fastqc/ && \ + git clone \ + "${FASTQC_GITHUB_URL}" \ + "${FASTQC_VERSION}" && \ + cd "${FASTQC_VERSION}" && \ + git checkout "${FASTQC_GIT_COMMIT}" && \ + JAVA_HOME="/usr/lib/jvm/java/" ant && \ + chmod +x "bin/fastqc" && \ + ln -s "$PWD/bin/fastqc" "/usr/local/bin/fastqc" \ + ) && \ + echo "Install awsv2 cli" 1>&2 && \ + pip install --upgrade \ + awscli && \ + pip install -r requirements.txt + +# Copy the lambda contents +COPY ${APP_ROOT}/run_fastqc.py ./ + +CMD ["run_fastqc.handler"] diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_fastqc_stats/requirements.txt b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_fastqc_stats/requirements.txt new file mode 100644 index 000000000..fecd999d7 --- /dev/null +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_fastqc_stats/requirements.txt @@ -0,0 +1,2 @@ +pandas==2.2.3 +wrapica==2.27.1.post20240830140737 \ No newline at end of file diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_fastqc_stats/run_fastqc.py b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_fastqc_stats/run_fastqc.py new file mode 100644 index 000000000..df379e699 --- /dev/null +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_fastqc_stats/run_fastqc.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 + +""" +Given a url to a fastq file, create a copy of the fastqc template +""" + +from pathlib import Path + +# Standard imports +import pandas as pd +import boto3 +import typing +import logging +import tempfile +from os import environ +import shutil +import subprocess + +# Wrapica +from wrapica.project_data import ( + ProjectData, create_download_url, convert_uri_to_project_data_obj +) + +# Type checking +from typing import Dict, List +if typing.TYPE_CHECKING: + from mypy_boto3_secretsmanager import SecretsManagerClient + +# Globals +ICAV2_BASE_URL = "https://ica.illumina.com/ica/rest" +FASTQC_TEMPLATE = """ +#!/usr/bin/env bash + +# Set up the environment +set -eu + +# Globals +OUTPUT_DIRNAME="outdir" + +# Glocals +S3_PRESIGNED_URL="__S3_PRESIGNED_URL__" + +# Create a directory to store the output +mkdir -p "${OUTPUT_DIRNAME}" + +# Download the file from S3, +# extract the first 4M lines (1 million reads), +# and run FastQC +wget \ + --quiet \ + --output-document /dev/stdout \ + "${S3_PRESIGNED_URL}" | \ +zcat | \ +head -n4000000 | \ +fastqc \ + --extract \ + --outdir outdir \ + --format fastq \ + --quiet \ + "stdin" 1>/dev/null 2>&1 + +# Print the summary to stdout +cat outdir/stdin_fastqc/summary.txt +""" + +# Set loggers +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def get_secrets_manager_client() -> 'SecretsManagerClient': + """ + Return Secrets Manager client + """ + return boto3.client("secretsmanager") + + +def get_secret(secret_id: str) -> str: + """ + Return secret value + """ + return get_secrets_manager_client().get_secret_value(SecretId=secret_id)["SecretString"] + + +# Functions +def set_icav2_env_vars(): + """ + Set the icav2 environment variables + :return: + """ + environ["ICAV2_BASE_URL"] = ICAV2_BASE_URL + environ["ICAV2_ACCESS_TOKEN"] = get_secret( + environ["ICAV2_ACCESS_TOKEN_SECRET_ID"] + ) + +def run_fastqc(fastq_url: str) -> List[Dict[str, str]]: + """ + Run fastqc through the template shell script + + PASS Basic Statistics stdin + PASS Per base sequence quality stdin + PASS Per tile sequence quality stdin + PASS Per sequence quality scores stdin + WARN Per base sequence content stdin + PASS Per sequence GC content stdin + PASS Per base N content stdin + WARN Sequence Length Distribution stdin + WARN Sequence Duplication Levels stdin + PASS Overrepresented sequences stdin + PASS Adapter Content stdin + + :param fastq_url: + :return: + """ + # Create a copy of the shell script + with tempfile.NamedTemporaryFile(suffix=".sh") as temp_file_obj: + # Copy the template to the temp file + filedata = FASTQC_TEMPLATE + + # Replace the __S3_PRESIGNED_URL__ with the fastq url + filedata = filedata.replace("__S3_PRESIGNED_URL__", fastq_url) + with open(temp_file_obj.name, "w") as file_h: + file_h.write(filedata) + + # Run the fastqc command in a temp directory + working_dir = tempfile.TemporaryDirectory() + + # Run the fastqc command + run_fastqc_proc = subprocess.run( + ["bash", temp_file_obj.name], + cwd=working_dir.name, + capture_output=True + ) + + if not run_fastqc_proc.returncode == 0: + logger.error(f"Run FastQC Proc failed with return code {run_fastqc_proc.returncode}") + logger.error(f"Run FastQC Proc failed with stderr {run_fastqc_proc.stderr.decode()}") + logger.error(f"Run FastQC Proc failed with stdout {run_fastqc_proc.stdout.decode()}") + raise ChildProcessError + + # Get the fastqc output + fastqc_output_str = run_fastqc_proc.stdout.decode() + + # Parse the fastqc output to a pandas dataframe + # Easiest to just create another temp file, write the output to that file, and then read it into a pandas dataframe + with tempfile.NamedTemporaryFile(suffix=".tsv") as temp_file_obj: + with open(temp_file_obj.name, "w") as file_h: + file_h.write(fastqc_output_str) + + # Read the file into a pandas dataframe + fastqc_output_df = pd.read_csv( + temp_file_obj.name, + sep="\t", + names=["status", "metric", "stdin"] + ).drop(columns=["stdin"]) + + # Convert metric from spaces to snake case + fastqc_output_df["metric"] = fastqc_output_df["metric"].str.replace(" ", "_") + + # Return as a dict + return fastqc_output_df.to_dict(orient="records") + + +def handler(event, context): + """ + Given a fastqc url, create a copy of the fastqc template and replace __S3_PRESIGNED_URL__ with the url to the fastq file + + Then run the fastqc command + + Extract the relevant outputs from the fastqc output and return them in json format + :param event: + :param context: + :return: + """ + + # Set the icav2 environment variables + set_icav2_env_vars() + + # Get the uri + uri = event['fastq_uri'] + + read_count = event['read_count'] + + ## Check if the read count is greater than 0 + if read_count == 0: + return { + "fastqc_output": None + } + + # Get the uri as a project data object + fastqc_projectdata_obj: ProjectData = convert_uri_to_project_data_obj(uri) + + # Create the download url + fastqc_download_url = create_download_url( + fastqc_projectdata_obj.project_id, + fastqc_projectdata_obj.data.id + ) + + # Run fastqc + fastqc_output = run_fastqc(fastqc_download_url) + + return { + "fastqc_output": fastqc_output + } + + +# if __name__ == "__main__": +# # Test the function +# import json +# from os import environ +# environ['ICAV2_ACCESS_TOKEN_SECRET_ID'] = "ICAv2JWTKey-umccr-prod-service-dev" +# environ['AWS_REGION'] = "ap-southeast-2" +# environ['AWS_PROFILE'] = 'umccr-development' +# environ['PATH'] = environ['PATH'] + ':/home/alexiswl/miniconda3/envs/biotools/bin' +# print( +# json.dumps( +# handler( +# { +# "fastq_uri": "s3://pipeline-dev-cache-503977275616-ap-southeast-2/byob-icav2/development/primary/240229_A00130_0288_BH5HM2DSXC/202409108ed29dcc/Samples/Lane_4/L2400165/L2400165_S16_L004_R1_001.fastq.gz" +# }, +# None +# ), +# indent=4 +# ) +# ) \ No newline at end of file diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_sequali_stats/Dockerfile b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_sequali_stats/Dockerfile new file mode 100644 index 000000000..06fd0fc95 --- /dev/null +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_sequali_stats/Dockerfile @@ -0,0 +1,26 @@ +FROM --platform=${TARGETPLATFORM} public.ecr.aws/lambda/python:3.12 + +LABEL maintainer="Alexis Lucattini" + +# Build args +ARG APP_ROOT=. + +# Copy over the requirements +COPY ${APP_ROOT}/requirements.txt ./ + +# Install htslib +RUN \ + dnf update -y && \ + dnf install -y \ + gzip \ + wget && \ + echo "Install awsv2 cli" 1>&2 && \ + pip install --upgrade \ + awscli && \ + echo "Install sequali" 1>&2 && \ + pip install -r requirements.txt + +# Copy the lambda contents +COPY ${APP_ROOT}/get_sequali_stats.py ./ + +CMD ["get_sequali_stats.handler"] diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_sequali_stats/get_sequali_stats.py b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_sequali_stats/get_sequali_stats.py new file mode 100644 index 000000000..2b92069a3 --- /dev/null +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_sequali_stats/get_sequali_stats.py @@ -0,0 +1,315 @@ +#!/usr/bin/env python3 + +""" +This script is used to get the statistics of the sequali dataset. +""" +import json +from pathlib import Path + +# Standard imports +import pandas as pd +import boto3 +import typing +import logging +import tempfile +from os import environ +import subprocess +from urllib.parse import urlparse + +# Wrapica +from wrapica.project_data import ( + ProjectData, create_download_url, convert_uri_to_project_data_obj +) + +# Type checking +from typing import Dict, List +if typing.TYPE_CHECKING: + from mypy_boto3_secretsmanager import SecretsManagerClient + +# Globals +ICAV2_BASE_URL = "https://ica.illumina.com/ica/rest" +HG38_N_BASES = 3099734149 # https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_000001405.26 + +SEQUALI_TEMPLATE_STR = """ +#!/usr/bin/env bash + +# Set up the environment +set -eu + +# Globals +OUTPUT_DIRNAME="output" + +# Glocals +S3_R1_FILE_NAME="__S3_R1_FILE_NAME__" +S3_R2_FILE_NAME="__S3_R2_FILE_NAME__" +S3_R1_PRESIGNED_URL="__S3_R1_PRESIGNED_URL__" +S3_R2_PRESIGNED_URL="__S3_R2_PRESIGNED_URL__" + +# Create a directory to store the output +mkdir -p "${OUTPUT_DIRNAME}" + +# Download the first 1 million reads for the R1 file +wget \ + --quiet \ + --output-document /dev/stdout \ + "${S3_R1_PRESIGNED_URL}" | \ +zcat | \ +head -n4000000 | \ +gzip --stdout > "${S3_R1_FILE_NAME%.fastq.gz}_subset.fastq.gz" + +# Download the first 1 million reads for the R2 file +wget \ + --quiet \ + --output-document /dev/stdout \ + "${S3_R2_PRESIGNED_URL}" | \ +zcat | \ +head -n4000000 | \ +gzip --stdout > "${S3_R2_FILE_NAME%.fastq.gz}_subset.fastq.gz" + +# Import the reads into sequali +sequali \ + --outdir "${OUTPUT_DIRNAME}" \ + --json "output.json" \ + "${S3_R1_FILE_NAME%.fastq.gz}_subset.fastq.gz" \ + "${S3_R2_FILE_NAME%.fastq.gz}_subset.fastq.gz" +""" + +# Set loggers +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def get_secrets_manager_client() -> 'SecretsManagerClient': + """ + Return Secrets Manager client + """ + return boto3.client("secretsmanager") + + +def get_secret(secret_id: str) -> str: + """ + Return secret value + """ + return get_secrets_manager_client().get_secret_value(SecretId=secret_id)["SecretString"] + + +# Functions +def set_icav2_env_vars(): + """ + Set the icav2 environment variables + :return: + """ + environ["ICAV2_BASE_URL"] = ICAV2_BASE_URL + environ["ICAV2_ACCESS_TOKEN"] = get_secret( + environ["ICAV2_ACCESS_TOKEN_SECRET_ID"] + ) + + +def get_insert_size_estimate(insert_sizes: List[int]) -> float: + """ + Given a list, return the weighted average + :param insert_sizes: + :return: + """ + + # Get the weighted average + # Dont include '0' insert sizes + insert_size_estimate = sum( + [ + insert_size * insert_size_count + for insert_size, insert_size_count in enumerate(insert_sizes) + if insert_size > 0 + ] + ) / sum(insert_sizes[1:]) + + return round(insert_size_estimate, 2) + + +def run_sequali(r1_fastq_url: str, r2_fastq_url: str, read_count: int) -> Dict[str, str]: + """ + Run sequali through the template shell script + :param r1_fastq_url: + :param r2_fastq_url: + :param read_count: + :return: + """ + # Create a copy of the shell script + with tempfile.NamedTemporaryFile(suffix=".sh") as temp_file_obj: + # Copy the template to the temp file + filedata = SEQUALI_TEMPLATE_STR + + # Replace + filedata = filedata.replace("__S3_R1_FILE_NAME__", Path(urlparse(r1_fastq_url).path).name) + filedata = filedata.replace("__S3_R2_FILE_NAME__", Path(urlparse(r2_fastq_url).path).name) + filedata = filedata.replace("__S3_R1_PRESIGNED_URL__", r1_fastq_url) + filedata = filedata.replace("__S3_R2_PRESIGNED_URL__", r2_fastq_url) + + # Write back + with open(temp_file_obj.name, "w") as file_h: + file_h.write(filedata) + + # Run the sequali command in a temp directory + working_dir = tempfile.TemporaryDirectory() + + # Run the sequali command + sequali_proc = subprocess.run( + ["bash", temp_file_obj.name], + cwd=working_dir.name, + capture_output=True + ) + + if not sequali_proc.returncode == 0: + # Log the output + logger.error("Sequali command failed") + logger.error("Stdout: '%s'", sequali_proc.stdout.decode()) + logger.error("Stderr: '%s'", sequali_proc.stderr.decode()) + + # Raise the error + raise ChildProcessError + + # Get the sequali output + sequali_output = Path(working_dir.name) / "output" / "output.json" + + # Read the file into a pandas dataframe + with open(sequali_output, "r") as file_h: + sequali_output_dict = json.load(file_h) + + # Convert to a dataframe + sequali_summary_df = ( + pd.DataFrame( + { + "r1": sequali_output_dict['summary'], + "r2": sequali_output_dict['summary_read2'] + } + ) + .transpose() + .reset_index() + # Get q20 fraction + .assign( + q20_pct=lambda x: round(x['q20_bases'] / x['total_bases'], 2), + gc_pct=lambda x: round(x['total_gc_bases'] / x['total_bases'], 2) + ) + # Drop columns related to total values (this is just a summary of the first million reads) + .drop( + columns=[ + "total_reads", "total_bases", + "q20_reads", "q20_bases", + "total_gc_bases", "total_n_bases" + ] + ) + ) + + # Calculate the insert size estimate + insert_size_estimate = get_insert_size_estimate(sequali_output_dict['insert_size_metrics']['insert_sizes']) + + # Get the duplicate fraction metric + duplicate_fraction = round(1.0 - sequali_output_dict['duplication_fractions']['remaining_fraction'], 2) + + return { + # Insert Size Estimate and Duplicate Fraction + "insert_size_estimate": insert_size_estimate, + "duplicate_fraction": duplicate_fraction, + "estimated_bases": ( + int(sequali_summary_df['mean_length'].sum() * read_count) + ), + "estimated_wgs_cov": round(sequali_summary_df['mean_length'].sum() * read_count / HG38_N_BASES, 2), + # R1 Mean length + "r1_mean_length": round(sequali_summary_df.query('index=="r1"')['mean_length'].item(), 2), + "r2_mean_length": round(sequali_summary_df.query('index=="r2"')['mean_length'].item(), 2), + # Min Read Length + "r1_min_read_length": round(sequali_summary_df.query('index=="r1"')['minimum_length'].item(), 2), + "r2_min_read_length": round(sequali_summary_df.query('index=="r2"')['minimum_length'].item(), 2), + # Max Read Length + "r1_max_read_length": round(sequali_summary_df.query('index=="r1"')['maximum_length'].item(), 2), + "r2_max_read_length": round(sequali_summary_df.query('index=="r2"')['maximum_length'].item(), 2), + # Q20 Fraction + "r1_q20_frac": round(sequali_summary_df.query('index=="r1"')['q20_pct'].item(), 2), + "r2_q20_frac": round(sequali_summary_df.query('index=="r2"')['q20_pct'].item(), 2), + # GC Fraction + "r1_gc_frac": round(sequali_summary_df.query('index=="r1"')['gc_pct'].item(), 2), + "r2_gc_frac": round(sequali_summary_df.query('index=="r2"')['gc_pct'].item(), 2), + } + + +def handler(event, context): + """ + Given a sequali url, create a copy of the sequali template and replace __S3_PRESIGNED_URL__ with the url to the fastq file + + Then run the sequali command + + Extract the relevant outputs from the sequali output and return them in json format + :param event: + :param context: + :return: + """ + + # Set the icav2 environment variables + set_icav2_env_vars() + + # Get the uri + read_count = event['read_count'] + read1_uri = event['read1_fastq_uri'] + read2_uri = event['read2_fastq_uri'] + + # Check read count is not zero + if read_count == 0: + return { + "sequali_rapid_summary": None + } + + # Get the uri as a project data object + read1_projectdata_obj: ProjectData = convert_uri_to_project_data_obj(read1_uri) + read2_projectdata_obj: ProjectData = convert_uri_to_project_data_obj(read2_uri) + + # Create the download url + read1_download_url = create_download_url( + read1_projectdata_obj.project_id, + read1_projectdata_obj.data.id + ) + read2_download_url = create_download_url( + read2_projectdata_obj.project_id, + read2_projectdata_obj.data.id + ) + + # Run sequali + sequali_output = run_sequali(read1_download_url, read2_download_url, read_count) + + return { + "sequali_rapid_summary": sequali_output + } + +# if __name__ == "__main__": +# # Set environ +# environ['AWS_PROFILE'] = 'umccr-production' +# environ['AWS_REGION'] = 'ap-southeast-2' +# environ['ICAV2_ACCESS_TOKEN_SECRET_ID'] = "ICAv2JWTKey-umccr-prod-service-production" +# +# print( +# json.dumps( +# handler( +# { +# "read1_fastq_uri": "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production/primary/240926_A01052_0232_AHW7LHDSXC/20240928f63332ac/Samples/Lane_1/L2401325/L2401325_S1_L001_R1_001.fastq.gz", +# "read2_fastq_uri": "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production/primary/240926_A01052_0232_AHW7LHDSXC/20240928f63332ac/Samples/Lane_1/L2401325/L2401325_S1_L001_R2_001.fastq.gz" +# }, +# None +# ), +# indent=4 +# ) +# ) +# +# # { +# # "sequali_rapid_summary": { +# # "insert_size_estimate": 167.32, +# # "duplicate_fraction": 0.18, +# # "r1_mean_length": 141.8, +# # "r2_mean_length": 141.79, +# # "r1_min_read_length": 35, +# # "r2_min_read_length": 35, +# # "r1_max_read_length": 143, +# # "r2_max_read_length": 143, +# # "r1_q20_frac": 0.98, +# # "r2_q20_frac": 0.97, +# # "r1_gc_frac": 0.5, +# # "r2_gc_frac": 0.5 +# # } +# # } \ No newline at end of file diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_sequali_stats/requirements.txt b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_sequali_stats/requirements.txt new file mode 100644 index 000000000..cef08bd17 --- /dev/null +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/lambdas/get_sequali_stats/requirements.txt @@ -0,0 +1,3 @@ +pandas==2.2.3 +wrapica==2.27.1.post20240830140737 +sequali==0.11.1 \ No newline at end of file diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/step_functions_templates/fastq_list_row_event_shower_sfn_template.asl.json b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/step_functions_templates/fastq_list_row_event_shower_sfn_template.asl.json index 15d8646d2..cc81361ad 100644 --- a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/step_functions_templates/fastq_list_row_event_shower_sfn_template.asl.json +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/clag/part_2/fastq-list-rows-event-shower/step_functions_templates/fastq_list_row_event_shower_sfn_template.asl.json @@ -14,57 +14,318 @@ "Next": "Generate Event Maps", "Branches": [ { - "StartAt": "Decompress fastq list rows", + "StartAt": "Pre-steps to Rapid QC", "States": { - "Decompress fastq list rows": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Parameters": { - "FunctionName": "${__decompress_fastq_list_rows_lambda_function_arn__}", - "Payload": { - "decompress": true, - "input.$": "$.inputs.payload.data.outputs.fastqListRowsB64gz" - } - }, - "Retry": [ + "Pre-steps to Rapid QC": { + "Type": "Parallel", + "Next": "Wait For DB Update", + "Branches": [ { - "ErrorEquals": [ - "Lambda.ServiceException", - "Lambda.AWSLambdaException", - "Lambda.SdkClientException", - "Lambda.TooManyRequestsException" - ], - "IntervalSeconds": 1, - "MaxAttempts": 3, - "BackoffRate": 2 + "StartAt": "Decompress fastq list rows", + "States": { + "Decompress fastq list rows": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { + "FunctionName": "${__decompress_fastq_list_rows_lambda_function_arn__}", + "Payload": { + "decompress": true, + "input.$": "$.inputs.payload.data.outputs.fastqListRowsB64gz" + } + }, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.AWSLambdaException", + "Lambda.SdkClientException", + "Lambda.TooManyRequestsException" + ], + "IntervalSeconds": 1, + "MaxAttempts": 3, + "BackoffRate": 2 + } + ], + "ResultSelector": { + "fastq_list_rows.$": "$.Payload.decompressed_dict" + }, + "ResultPath": "$.decompress_fastq_list_rows_step", + "Next": "Clean up fastq list rows" + }, + "Clean up fastq list rows": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { + "FunctionName": "${__clean_up_fastq_list_rows_lambda_function_arn__}", + "Payload": { + "instrument_run_id.$": "$.inputs.payload.data.outputs.instrumentRunId", + "fastq_list_rows.$": "$.decompress_fastq_list_rows_step.fastq_list_rows" + } + }, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.AWSLambdaException", + "Lambda.SdkClientException", + "Lambda.TooManyRequestsException" + ], + "IntervalSeconds": 1, + "MaxAttempts": 3, + "BackoffRate": 2 + } + ], + "ResultSelector": { + "fastq_list_rows.$": "$.Payload.fastq_list_rows" + }, + "ResultPath": "$.clean_fastq_list_rows_step", + "Next": "Add fastq list rows to instrument run id (Instrument DB)" + }, + "Add fastq list rows to instrument run id (Instrument DB)": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:putItem", + "Parameters": { + "TableName": "${__table_name__}", + "Item": { + "id.$": "$.inputs.payload.data.outputs.instrumentRunId", + "id_type": "${__fastq_list_rows_table_partition_name__}", + "fastq_list_rows": { + "S.$": "States.JsonToString($.clean_fastq_list_rows_step.fastq_list_rows)" + } + } + }, + "ResultPath": null, + "End": true + } + } + }, + { + "StartAt": "Get Read Counts Per RGID", + "States": { + "Get Read Counts Per RGID": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { + "FunctionName": "${__get_read_counts_per_rgid_lambda_function_arn__}", + "Payload": { + "instrument_run_id.$": "$.inputs.payload.data.outputs.instrumentRunId", + "demux_uri.$": "States.Format('{}Reports/Demultiplex_Stats.csv', $.inputs.payload.data.outputs.outputUri)" + } + }, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.AWSLambdaException", + "Lambda.SdkClientException", + "Lambda.TooManyRequestsException" + ], + "IntervalSeconds": 1, + "MaxAttempts": 3, + "BackoffRate": 2 + } + ], + "ResultSelector": { + "read_count_by_fastq_list_row.$": "$.Payload.read_count_by_fastq_list_row" + }, + "End": true + } + } } ], - "ResultPath": "$.decompress_fastq_list_rows_step", "ResultSelector": { - "fastq_list_rows.$": "$.Payload.decompressed_dict" - }, - "Next": "Add fastq list rows to instrument run id (Instrument DB)" - }, - "Add fastq list rows to instrument run id (Instrument DB)": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:putItem", - "Parameters": { - "TableName": "${__table_name__}", - "Item": { - "id.$": "$.inputs.payload.data.outputs.instrumentRunId", - "id_type": "${__fastq_list_rows_table_partition_name__}", - "fastq_list_rows": { - "S.$": "States.JsonToString($.decompress_fastq_list_rows_step.fastq_list_rows)" - } - } + "fastq_list_rows.$": "$.[0].clean_fastq_list_rows_step.fastq_list_rows", + "read_count_by_fastq_list_row.$": "$.[1].read_count_by_fastq_list_row" }, - "ResultPath": null, - "Next": "Wait For DB Update" + "ResultPath": "$.pre_steps_to_rapid_qc_steps" }, "Wait For DB Update": { "Type": "Wait", "Seconds": 1, - "End": true + "Next": "Iterate over fastq list rows" + }, + "Iterate over fastq list rows": { + "Type": "Map", + "ItemsPath": "$.pre_steps_to_rapid_qc_steps.fastq_list_rows", + "ItemSelector": { + "fastq_list_row.$": "$$.Map.Item.Value", + "read_count_by_fastq_list_row.$": "$.pre_steps_to_rapid_qc_steps.read_count_by_fastq_list_row" + }, + "ItemProcessor": { + "ProcessorConfig": { + "Mode": "INLINE" + }, + "StartAt": "Get Read Counts", + "States": { + "Get Read Counts": { + "Type": "Pass", + "Next": "Get Rapid QC", + "Parameters": { + "read_count_by_fastq_list_row.$": "States.ArrayGetItem($.read_count_by_fastq_list_row[?(@.fastqListRowRgid==$.fastq_list_row.rgid)].numReads, 0)" + }, + "ResultPath": "$.get_read_count_step" + }, + "Get Rapid QC": { + "Type": "Parallel", + "Branches": [ + { + "StartAt": "Get fastqc stats", + "States": { + "Get fastqc stats": { + "Type": "Parallel", + "Branches": [ + { + "StartAt": "Get shallow fastqc stats R1", + "States": { + "Get shallow fastqc stats R1": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { + "FunctionName": "${__get_fastqc_stats_lambda_function_arn__}", + "Payload": { + "read_count.$": "$.get_read_count_step.read_count_by_fastq_list_row", + "fastq_uri.$": "$.fastq_list_row.read1FileUri" + } + }, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.AWSLambdaException", + "Lambda.SdkClientException", + "Lambda.TooManyRequestsException" + ], + "IntervalSeconds": 1, + "MaxAttempts": 3, + "BackoffRate": 2 + } + ], + "ResultSelector": { + "fastqc_output.$": "$.Payload.fastqc_output" + }, + "ResultPath": "$.fastqc_stats_step_r1", + "End": true + } + } + }, + { + "StartAt": "Get shallow fastqc stats R2", + "States": { + "Get shallow fastqc stats R2": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { + "FunctionName": "${__get_fastqc_stats_lambda_function_arn__}", + "Payload": { + "read_count.$": "$.get_read_count_step.read_count_by_fastq_list_row", + "fastq_uri.$": "$.fastq_list_row.read2FileUri" + } + }, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.AWSLambdaException", + "Lambda.SdkClientException", + "Lambda.TooManyRequestsException" + ], + "IntervalSeconds": 1, + "MaxAttempts": 3, + "BackoffRate": 2 + } + ], + "ResultSelector": { + "fastqc_output.$": "$.Payload.fastqc_output" + }, + "ResultPath": "$.fastqc_stats_step_r2", + "End": true + } + } + } + ], + "ResultSelector": { + "fastqc_stats_r1.$": "$.[0].fastqc_stats_step_r1.fastqc_output", + "fastqc_stats_r2.$": "$.[1].fastqc_stats_step_r2.fastqc_output" + }, + "End": true + } + } + }, + { + "StartAt": "Get Sequali Stats", + "States": { + "Get Sequali Stats": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { + "FunctionName": "${__get_sequali_stats_lambda_function_arn__}", + "Payload": { + "read1_fastq_uri.$": "$.fastq_list_row.read1FileUri", + "read2_fastq_uri.$": "$.fastq_list_row.read2FileUri", + "read_count.$": "$.get_read_count_step.read_count_by_fastq_list_row" + } + }, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.AWSLambdaException", + "Lambda.SdkClientException", + "Lambda.TooManyRequestsException" + ], + "IntervalSeconds": 1, + "MaxAttempts": 3, + "BackoffRate": 2 + } + ], + "ResultSelector": { + "sequali_rapid_summary.$": "$.Payload.sequali_rapid_summary" + }, + "End": true + } + } + } + ], + "ResultSelector": { + "fastqc_stats_r1.$": "$.[0].fastqc_stats_r1", + "fastqc_stats_r2.$": "$.[0].fastqc_stats_r2", + "sequali_rapid_summary.$": "$.[1].sequali_rapid_summary" + }, + "ResultPath": "$.get_rapid_qc_step", + "Next": "Add Fastq List Row to DB" + }, + "Add Fastq List Row to DB": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:putItem", + "Parameters": { + "TableName": "${__table_name__}", + "Item": { + "id.$": "$.fastq_list_row.rgid", + "id_type": "${__fastq_list_row_table_partition_name__}", + "fastqc_rapid_stats_r1": { + "S.$": "States.JsonToString($.get_rapid_qc_step.fastqc_stats_r1)" + }, + "fastqc_rapid_stats_r2": { + "S.$": "States.JsonToString($.get_rapid_qc_step.fastqc_stats_r2)" + }, + "sequali_rapid_summary": { + "S.$": "States.JsonToString($.get_rapid_qc_step.sequali_rapid_summary)" + }, + "read_count": { + "N.$": "States.Format('{}', $.get_read_count_step.read_count_by_fastq_list_row)" + } + } + }, + "End": true, + "ResultPath": null + } + } + }, + "ResultSelector": { + "summary_stats.$": "$.[*].summarise_outputs_step.summary_obj" + }, + "End": true, + "ResultPath": null } } }, @@ -188,7 +449,7 @@ } ], "ResultSelector": { - "fastq_list_rows.$": "$.[0].decompress_fastq_list_rows_step.fastq_list_rows", + "fastq_list_rows.$": "$.[0].pre_steps_to_rapid_qc_steps.fastq_list_rows", "library_objects_list.$": "$.[1].library_objects_list", "project_objects_list.$": "$.[2].project_objects_list" }, @@ -297,8 +558,45 @@ "ProcessorConfig": { "Mode": "INLINE" }, - "StartAt": "Generate FastqListRow Added Event", + "StartAt": "Get FastqListRow Added Event", "States": { + "Get FastqListRow Added Event": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Parameters": { + "TableName": "${__table_name__}", + "Key": { + "id.$": "$.fastq_list_row_event_data.fastqListRow.rgid", + "id_type": "${__fastq_list_row_table_partition_name__}" + } + }, + "Next": "QC As Event Object", + "ResultPath": "$.get_qc_stats_from_db_step" + }, + "QC As Event Object": { + "Type": "Pass", + "Next": "Merge QC With Event Data", + "Parameters": { + "qc": { + "fastqQcRapidSummary": { + "r1.$": "States.StringToJson($.get_qc_stats_from_db_step.Item.fastqc_rapid_stats_r1.S)", + "r2.$": "States.StringToJson($.get_qc_stats_from_db_step.Item.fastqc_rapid_stats_r2.S)" + }, + "sequaliRapidSummary.$": "States.StringToJson($.get_qc_stats_from_db_step.Item.sequali_rapid_summary.S)", + "demuxSummary": { + "readCount.$": "$.get_qc_stats_from_db_step.Item.read_count.N" + } + } + }, + "ResultPath": "$.get_qc_as_event_object_step" + }, + "Merge QC With Event Data": { + "Type": "Pass", + "Next": "Generate FastqListRow Added Event", + "Parameters": { + "fastq_list_row_event_data.$": "States.JsonMerge($.fastq_list_row_event_data, $.get_qc_as_event_object_step, false)" + } + }, "Generate FastqListRow Added Event": { "Type": "Task", "Resource": "arn:aws:states:::events:putEvents", diff --git a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/index.ts b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/index.ts index 8db1bf7aa..10fe9333c 100644 --- a/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/index.ts +++ b/lib/workload/stateless/stacks/stacky-mcstackface/glue-constructs/index.ts @@ -66,6 +66,8 @@ export class GlueConstruct extends Construct { eventBusObj: props.eventBusObj, /* Tables */ instrumentRunTableObj: props.instrumentRunTableObj, + /* Secrets */ + icav2AccessTokenSecretObj: props.icav2AccessTokenSecretObj, }); /*