From c18174de1dfdb0fc100afd4a69bd74b4b2d7f5b0 Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 9 Aug 2024 10:59:55 +0200 Subject: [PATCH 1/3] add raw library merging + publishing --- conf/modules.config | 53 +++++++++++++++++++++++++++++++++++------ workflows/eager.nf | 58 +++++++++++++++++++++++++-------------------- 2 files changed, 78 insertions(+), 33 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 37c5d666..bc41e822 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -951,7 +951,7 @@ process { // LIBRARY MERGE // - withName: SAMTOOLS_MERGE_LIBRARIES { + withName: ".*MERGE_LIBRARIES:SAMTOOLS_MERGE_LIBRARIES" { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}_unsorted" } publishDir = [ @@ -959,32 +959,71 @@ process { ] } - withName: SAMTOOLS_SORT_MERGED_LIBRARIES { + withName: ".*MERGE_LIBRARIES:SAMTOOLS_SORT_MERGED_LIBRARIES" { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/" }, + path: { "${params.outdir}/final_bams/raw/" }, mode: params.publish_dir_mode, pattern: '*.bam' ] } - withName: SAMTOOLS_INDEX_MERGED_LIBRARIES { + withName: ".*MERGE_LIBRARIES:SAMTOOLS_INDEX_MERGED_LIBRARIES" { tag = { "${meta.reference}|${meta.sample_id}" } ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/" }, + path: { "${params.outdir}/final_bams/raw/" }, mode: params.publish_dir_mode, pattern: '*.{bai,csi}' ] } - withName: SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES { + withName: ".*MERGE_LIBRARIES:SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES" { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/" }, + path: { "${params.outdir}/final_bams/raw/" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat' + ] + } + + withName: ".*MERGE_LIBRARIES:SAMTOOLS_MERGE_LIBRARIES" { + tag = { "${meta.reference}|${meta.sample_id}" } + ext.prefix = { "${meta.sample_id}_${meta.reference}_unsorted" } + publishDir = [ + enabled: false + ] + } + + withName: ".*MERGE_LIBRARIES_GENOTYPING:SAMTOOLS_SORT_MERGED_LIBRARIES" { + tag = { "${meta.reference}|${meta.sample_id}" } + ext.prefix = { "${meta.sample_id}_${meta.reference}" } + publishDir = [ + path: { "${params.outdir}/final_bams/for_genotyping/" }, + mode: params.publish_dir_mode, + pattern: '*.bam' + ] + } + + withName: ".*MERGE_LIBRARIES_GENOTYPING:SAMTOOLS_INDEX_MERGED_LIBRARIES" { + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } + ext.prefix = { "${meta.sample_id}_${meta.reference}" } + publishDir = [ + path: { "${params.outdir}/final_bams/for_genotyping/" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] + } + + withName: ".*MERGE_LIBRARIES_GENOTYPING:SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES" { + tag = { "${meta.reference}|${meta.sample_id}" } + ext.prefix = { "${meta.sample_id}_${meta.reference}" } + publishDir = [ + path: { "${params.outdir}/final_bams/for_genotyping/" }, mode: params.publish_dir_mode, pattern: '*.flagstat' ] diff --git a/workflows/eager.nf b/workflows/eager.nf index fee97626..f7fc00f1 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -21,18 +21,19 @@ include { addNewMetaFromAttributes } from '../subworkflows/local/utils_nfcore_ea // // TODO rename to active: index_reference, filter_bam etc. -include { REFERENCE_INDEXING } from '../subworkflows/local/reference_indexing' -include { PREPROCESSING } from '../subworkflows/local/preprocessing' -include { MAP } from '../subworkflows/local/map' -include { FILTER_BAM } from '../subworkflows/local/bamfiltering.nf' -include { DEDUPLICATE } from '../subworkflows/local/deduplicate' -include { MANIPULATE_DAMAGE } from '../subworkflows/local/manipulate_damage' -include { METAGENOMICS_COMPLEXITYFILTER } from '../subworkflows/local/metagenomics_complexityfilter' -include { ESTIMATE_CONTAMINATION } from '../subworkflows/local/estimate_contamination' -include { CALCULATE_DAMAGE } from '../subworkflows/local/calculate_damage' -include { RUN_SEXDETERRMINE } from '../subworkflows/local/run_sex_determination' -include { MERGE_LIBRARIES } from '../subworkflows/local/merge_libraries' -include { GENOTYPE } from '../subworkflows/local/genotype' +include { REFERENCE_INDEXING } from '../subworkflows/local/reference_indexing' +include { PREPROCESSING } from '../subworkflows/local/preprocessing' +include { MAP } from '../subworkflows/local/map' +include { FILTER_BAM } from '../subworkflows/local/bamfiltering.nf' +include { DEDUPLICATE } from '../subworkflows/local/deduplicate' +include { MANIPULATE_DAMAGE } from '../subworkflows/local/manipulate_damage' +include { METAGENOMICS_COMPLEXITYFILTER } from '../subworkflows/local/metagenomics_complexityfilter' +include { ESTIMATE_CONTAMINATION } from '../subworkflows/local/estimate_contamination' +include { CALCULATE_DAMAGE } from '../subworkflows/local/calculate_damage' +include { RUN_SEXDETERRMINE } from '../subworkflows/local/run_sex_determination' +include { MERGE_LIBRARIES } from '../subworkflows/local/merge_libraries' +include { MERGE_LIBRARIES as MERGE_LIBRARIES_GENOTYPING } from '../subworkflows/local/merge_libraries' +include { GENOTYPE } from '../subworkflows/local/genotype' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -264,6 +265,15 @@ workflow EAGER { ch_dedupped_flagstat = Channel.empty() } + // + // SUBWORKFLOW: Merge libraries per sample + // + + MERGE_LIBRARIES ( ch_dedupped_bams ) + ch_versions = ch_versions.mix( MERGE_LIBRARIES.out.versions ) + ch_merged_dedup_bams = MERGE_LIBRARIES.out.bam_bai + ch_multiqc_files = ch_multiqc_files.mix( MERGE_LIBRARIES.out.mqc.collect{it[1]}.ifEmpty([]) ) + // // MODULE QUALIMAP // @@ -538,27 +548,23 @@ workflow EAGER { // // SUBWORKFLOW: aDNA Damage Manipulation + // if ( params.run_mapdamage_rescaling || params.run_pmd_filtering || params.run_trim_bam ) { MANIPULATE_DAMAGE( ch_dedupped_bams, ch_fasta_for_deduplication.fasta, REFERENCE_INDEXING.out.pmd_masking ) - ch_multiqc_files = ch_multiqc_files.mix( MANIPULATE_DAMAGE.out.flagstat.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( MANIPULATE_DAMAGE.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( MANIPULATE_DAMAGE.out.flagstat.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( MANIPULATE_DAMAGE.out.versions ) ch_bams_for_library_merge = params.genotyping_source == 'rescaled' ? MANIPULATE_DAMAGE.out.rescaled : params.genotyping_source == 'pmd' ? MANIPULATE_DAMAGE.out.filtered : params.genotyping_source == 'trimmed' ? MANIPULATE_DAMAGE.out.trimmed : ch_dedupped_bams + + // SUBWORKFLOW: merge libraries for genotyping + MERGE_LIBRARIES_GENOTYPING ( ch_bams_for_library_merge ) + ch_versions = ch_versions.mix( MERGE_LIBRARIES_GENOTYPING.out.versions ) + ch_bams_for_genotyping = MERGE_LIBRARIES_GENOTYPING.out.bam_bai + ch_multiqc_files = ch_multiqc_files.mix( MERGE_LIBRARIES_GENOTYPING.out.mqc.collect{it[1]}.ifEmpty([]) ) } else { - ch_bams_for_library_merge = ch_dedupped_bams + ch_bams_for_genotyping = ch_merged_dedup_bams } - // - // SUBWORKFLOW: MERGE LIBRARIES - // - - // The bams being merged are always the ones specified by params.genotyping_source, - // unless the user skipped damage manipulation, in which case it is the DEDUPLICATION output. - MERGE_LIBRARIES ( ch_bams_for_library_merge ) - ch_versions = ch_versions.mix( MERGE_LIBRARIES.out.versions ) - ch_bams_for_genotyping = MERGE_LIBRARIES.out.bam_bai - ch_multiqc_files = ch_multiqc_files.mix( MERGE_LIBRARIES.out.mqc.collect{it[1]}.ifEmpty([]) ) // Not sure if this is needed, or if it needs to be moved to line 564? - // // SUBWORKFLOW: Genotyping // From d2983efbffe247a62f62809600b3bb4fa325c343 Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 9 Aug 2024 11:09:52 +0200 Subject: [PATCH 2/3] merged input for bedtools, qualimap, sex det --- conf/modules.config | 20 ++++++++++++-------- workflows/eager.nf | 6 +++--- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index bc41e822..f91a741c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -668,16 +668,16 @@ process { // BEDTOOLS_COVERAGE // withName: SAMTOOLS_VIEW_GENOME { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } publishDir = [ enabled: false ] } withName: BEDTOOLS_COVERAGE_DEPTH { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } ext.args = '-mean -nonamecheck' - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_depth" } + ext.prefix = { "${meta.sample_id}_${meta.reference}_depth" } publishDir = [ path: { "${params.outdir}/mapstats/bedtools" }, mode: params.publish_dir_mode @@ -685,9 +685,9 @@ process { } withName: BEDTOOLS_COVERAGE_BREADTH { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } ext.args = '-nonamecheck' - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_breadth" } + ext.prefix = { "${meta.sample_id}_${meta.reference}_breadth" } publishDir = [ path: { "${params.outdir}/mapstats/bedtools" }, mode: params.publish_dir_mode @@ -880,8 +880,12 @@ process { ] } + // + // QUALIMAP + // + withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } publishDir = [ path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/${meta.sample_id}/}" }, mode: params.publish_dir_mode, @@ -928,7 +932,7 @@ process { // RUN SEXDETERRMINE // withName: SAMTOOLS_DEPTH_SEXDETERRMINE { - tag = { "${meta1.reference}|${meta1.sample_id}_${meta1.library_id}" } + tag = { "${meta1.reference}|${meta1.sample_id}" } ext.prefix = { "${meta2.id}_samtoolsdepth" } ext.args = '-aa -q30 -Q30 -H' publishDir = [ @@ -937,7 +941,7 @@ process { } withName: SEXDETERRMINE { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.reference}_sexdeterrmine" } publishDir = [ path: { "${params.outdir}/sex_determination/" }, diff --git a/workflows/eager.nf b/workflows/eager.nf index f7fc00f1..a6db6da1 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -283,7 +283,7 @@ workflow EAGER { .map{ addNewMetaFromAttributes( it, "id" , "reference" , false ) } - ch_qualimap_input = ch_dedupped_bams + ch_qualimap_input = ch_merged_dedup_bams .map { meta, bam, bai -> [ meta, bam ] @@ -466,7 +466,7 @@ workflow EAGER { addNewMetaFromAttributes( it, "id" , "reference" , false ) } - ch_bedtools_prep = ch_dedupped_bams + ch_bedtools_prep = ch_merged_dedup_bams .map { addNewMetaFromAttributes( it, "reference" , "reference" , false ) } @@ -527,7 +527,7 @@ workflow EAGER { // if ( params.run_sexdeterrmine ) { - ch_sexdeterrmine_input = ch_dedupped_bams + ch_sexdeterrmine_input = ch_merged_dedup_bams RUN_SEXDETERRMINE(ch_sexdeterrmine_input, REFERENCE_INDEXING.out.sexdeterrmine_bed ) ch_versions = ch_versions.mix( RUN_SEXDETERRMINE.out.versions ) From 976c5cffd3c3e95b0c22fbf05a2bb4e17c894b56 Mon Sep 17 00:00:00 2001 From: Selina Carlhoff <73653549+scarlhoff@users.noreply.github.com> Date: Fri, 13 Sep 2024 14:50:48 +0200 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Thiseas C. Lamnidis --- conf/modules.config | 6 +++--- workflows/eager.nf | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f91a741c..aa4c9eba 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1006,7 +1006,7 @@ process { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/for_genotyping/" }, + path: { "${params.outdir}/final_bams/${params.genotyping_source}/" }, mode: params.publish_dir_mode, pattern: '*.bam' ] @@ -1017,7 +1017,7 @@ process { ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/for_genotyping/" }, + path: { "${params.outdir}/final_bams/${params.genotyping_source}/" }, mode: params.publish_dir_mode, pattern: '*.{bai,csi}' ] @@ -1027,7 +1027,7 @@ process { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/for_genotyping/" }, + path: { "${params.outdir}/final_bams/${params.genotyping_source}/" }, mode: params.publish_dir_mode, pattern: '*.flagstat' ] diff --git a/workflows/eager.nf b/workflows/eager.nf index a6db6da1..65bf4c40 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -554,7 +554,7 @@ workflow EAGER { MANIPULATE_DAMAGE( ch_dedupped_bams, ch_fasta_for_deduplication.fasta, REFERENCE_INDEXING.out.pmd_masking ) ch_multiqc_files = ch_multiqc_files.mix( MANIPULATE_DAMAGE.out.flagstat.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( MANIPULATE_DAMAGE.out.versions ) - ch_bams_for_library_merge = params.genotyping_source == 'rescaled' ? MANIPULATE_DAMAGE.out.rescaled : params.genotyping_source == 'pmd' ? MANIPULATE_DAMAGE.out.filtered : params.genotyping_source == 'trimmed' ? MANIPULATE_DAMAGE.out.trimmed : ch_dedupped_bams + ch_bams_for_library_merge = params.genotyping_source == 'rescaled' ? MANIPULATE_DAMAGE.out.rescaled : params.genotyping_source == 'pmd' ? MANIPULATE_DAMAGE.out.filtered : params.genotyping_source == 'trimmed' ? MANIPULATE_DAMAGE.out.trimmed : ch_merged_dedup_bams // SUBWORKFLOW: merge libraries for genotyping MERGE_LIBRARIES_GENOTYPING ( ch_bams_for_library_merge )