diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cef7ac5..609ed74f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#313](https://github.com/nf-core/taxprofiler/pull/304) Fix pipeline not providing error when database sheet does not have a header (♥ to @noah472 for reporting, fix by @jfy133) - [#330](https://github.com/nf-core/taxprofiler/pull/330) Added better tagging to allow disambiguation of Kraken2 steps of Kraken2 vs Bracken (♥ to @MajoroMask for requesting, added by @jfy133) - [#334](https://github.com/nf-core/taxprofiler/pull/334) Increase the memory of the FALCO process to 4GB (fix by @LilyAnderssonLee) +- Improved meta map stability for more robust pipeline resuming (fix by @jfy133) ### `Dependencies` diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index de11bf47..9e0f0165 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -64,7 +64,6 @@ workflow PROFILING { if ( params.run_malt ) { - // MALT: We groupTuple to have all samples in one channel for MALT as database // loading takes a long time, so we only want to run it once per database ch_input_for_malt = ch_input_for_profiling.malt @@ -75,17 +74,11 @@ workflow PROFILING { // as we don't run run on a per-sample basis due to huge datbaases // so all samples are in one run and so sample-specific metadata // unnecessary. Set as database name to prevent `null` job ID and prefix. - def temp_meta = [ id: meta.db_name ] + def new_meta = db_meta + [ id: db_meta.db_name ] // Extend database parameters to specify whether to save alignments or not - def new_db_meta = db_meta.clone() def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : "" - new_db_meta.db_params = db_meta.db_params + sam_format - - // Combine reduced sample metadata with updated database parameters metadata, - // make sure id is db_name for publishing purposes. - def new_meta = temp_meta + new_db_meta - new_meta.id = new_meta.db_name + new_meta.db_params = db_meta.db_params + sam_format [ new_meta, reads, db ] @@ -106,9 +99,8 @@ workflow PROFILING { // re-extract meta from file names, use filename without rma to // ensure we keep paired-end information in downstream filenames // when no pair-merging - def meta_new = meta.clone() - meta_new['db_name'] = meta.id - meta_new['id'] = rma.baseName + def meta_new = meta + [db_name: meta.id, id: rma.baseName] + [ meta_new, rma ] } @@ -127,16 +119,15 @@ workflow PROFILING { ch_input_for_kraken2 = ch_input_for_profiling.kraken2 .map { meta, reads, db_meta, db -> - def db_meta_new = db_meta.clone() - // Only take second element if one exists - def parsed_params = db_meta_new['db_params'].split(";") + // Only take first element if one exists + def parsed_params = db_meta['db_params'].split(";") if ( parsed_params.size() == 2 ) { - db_meta_new['db_params'] = parsed_params[0] + db_meta_new = db_meta + [db_params: parsed_params[0]] } else if ( parsed_params.size() == 0 ) { - db_meta_new['db_params'] = "" + db_meta_new = db_meta + [db_params: ""] } else { - db_meta_new['db_params'] = parsed_params[0] + db_meta_new = db_meta + [db_params: parsed_params[0]] } [ meta, reads, db_meta_new, db ] @@ -186,18 +177,24 @@ workflow PROFILING { .map { key, meta, reads, db_meta, db -> - def db_meta_new = db_meta.clone() + + // // Have to make a completely fresh copy here as otherwise + // // was getting db_param loss due to upstream meta parsing at + // // kraken2 input channel manipulation step + def db_meta_keys = db_meta.keySet() + def db_meta_new = db_meta.subMap(db_meta_keys) // Have to pick second element if using bracken, as first element // contains kraken parameters if ( db_meta.tool == 'bracken' ) { // Only take second element if one exists - def parsed_params = db_meta_new['db_params'].split(";") + def parsed_params = db_meta['db_params'].split(";") + if ( parsed_params.size() == 2 ) { - db_meta_new['db_params'] = parsed_params[1] + db_meta_new = db_meta + [ db_params: parsed_params[1] ] } else { - db_meta_new['db_params'] = "" + db_meta_new = db_meta + [ db_params: "" ] } } else { diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index cac5a27a..d92208a5 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -28,8 +28,7 @@ workflow SHORTREAD_FASTP { ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged .map { meta, reads -> - def meta_new = meta.clone() - meta_new['single_end'] = true + def meta_new = meta + [single_end: true] [ meta_new, [ reads ].flatten() ] } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index f964e740..59ef61f9 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -209,8 +209,7 @@ workflow TAXPROFILER { .mix( ch_longreads_hostremoved ) .map { meta, reads -> - def meta_new = meta.clone() - meta_new.remove('run_accession') + def meta_new = meta - meta.subMap('run_accession') [ meta_new, reads ] } .groupTuple()