diff --git a/conf/modules.config b/conf/modules.config index b1a51e9..21cb4af 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -657,61 +657,60 @@ process { // Scaffolding hap1/hap2 if (params.hifiasm_hic_on) { - // hap1 scaffolding - withName: '.*HIC_MAPPING_HAP1:SAMTOOLS_MARKDUP_HIC_MAPPING' { + withName: '.*HIC_MAPPING_HAP.*:SAMTOOLS_MARKDUP_HIC_MAPPING' { ext.prefix = { "${meta.id}_mkdup" } publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*HIC_MAPPING_HAP1:BAMTOBED_SORT' { + withName: '.*HIC_MAPPING_HAP.*:BAMTOBED_SORT' { publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*HIC_MAPPING_HAP1:CONVERT_STATS:SAMTOOLS_STATS' { + withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_STATS' { publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*HIC_MAPPING_HAP1:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { + withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*HIC_MAPPING_HAP1:CONVERT_STATS:SAMTOOLS_IDXSTATS' { + withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_IDXSTATS' { publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*SCAFFOLDING_HAP1:YAHS' { - ext.prefix = 'hap1' + withName: '.*SCAFFOLDING_HAP.*:YAHS' { + ext.prefix = { "${meta.hap_id}" } publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*SCAFFOLDING_HAP1:COOLER_CLOAD' { + withName: '.*SCAFFOLDING_HAP.*:COOLER_CLOAD' { // Positions in the input file are zero-based; // chrom1 field number (one-based) is 2; // pos1 field number (one-based) is 3; @@ -719,142 +718,43 @@ process { // pos2 field number (one-based) is 7 ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7' publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*SCAFFOLDING_HAP1:PRETEXTSNAPSHOT' { + withName: '.*SCAFFOLDING_HAP.*:PRETEXTSNAPSHOT' { // Make one plot containing all sequences ext.args = '--sequences \"=full\"' publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*SCAFFOLDING_HAP1:JUICER_TOOLS_PRE' { + withName: '.*SCAFFOLDING_HAP.*:JUICER_TOOLS_PRE' { ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' ext.juicer_jvm_params = '-Xms1g -Xmx6g' publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*SCAFFOLDING_HAP1:JUICER_PRE' { + withName: '.*SCAFFOLDING_HAP.*:JUICER_PRE' { ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - - // End of hap1 scaffolding - - // hap2 scaffolding - - withName: '.*HIC_MAPPING_HAP2:SAMTOOLS_MARKDUP_HIC_MAPPING' { - ext.prefix = { "${meta.id}_mkdup" } - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*HIC_MAPPING_HAP2:BAMTOBED_SORT' { - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - - withName: '.*HIC_MAPPING_HAP2:CONVERT_STATS:SAMTOOLS_STATS' { - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*HIC_MAPPING_HAP2:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: '.*HIC_MAPPING_HAP2:CONVERT_STATS:SAMTOOLS_IDXSTATS' { - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } - withName: '.*SCAFFOLDING_HAP2:YAHS' { - ext.prefix = 'hap2' - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - - } - - withName: '.*SCAFFOLDING_HAP2:COOLER_CLOAD' { - // Positions in the input file are zero-based; - // chrom1 field number (one-based) is 2; - // pos1 field number (one-based) is 3; - // chrom2 field number (one-based) is 6; - // pos2 field number (one-based) is 7 - ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7' - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*SCAFFOLDING_HAP2:PRETEXTSNAPSHOT' { - // Make one plot containing all sequences - ext.args = '--sequences \"=full\"' - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*SCAFFOLDING_HAP2:JUICER_TOOLS_PRE' { - ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' - ext.juicer_jvm_params = '-Xms1g -Xmx6g' - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*SCAFFOLDING_HAP2:JUICER_PRE' { - ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" - publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - // End of hap2 scaffolding + // End of hap1/hap2 scaffolding withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:GFASTATS_PRI' { ext.prefix = { "${meta.id}_scaffolds_final" } @@ -911,7 +811,6 @@ process { // End of Scaffolding hap1/hap2 // End of Set up of the scaffolding pipeline - //Set up of assembly stats subworkflow withName: 'BUSCO' { ext.args = "--mode genome" diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index c8786ba..a4d340c 100644 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -26,6 +26,7 @@ workflow HIC_MAPPING { reference_tuple // Channel [ val(meta), path(file) ] hic_reads_path // Channel [ val(meta), path(directory) ] hic_aligner_ch // Channel [ val(meta), val(hic_aligner)] + hap_id // Value hap_id main: ch_versions = Channel.empty() @@ -67,7 +68,8 @@ workflow HIC_MAPPING { bwamem2 : it[0].aligner == "bwamem2" } .set{ch_aligner} - + + // // SUBWORKFLOW: mapping hic reads using minimap2 // @@ -78,7 +80,7 @@ workflow HIC_MAPPING { ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions ) mappedbams = HIC_MINIMAP2.out.mappedbams - // + // // SUBWORKFLOW: mapping hic reads using bwamem2 // HIC_BWAMEM2 ( @@ -88,6 +90,9 @@ workflow HIC_MAPPING { ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions ) mappedbams = mappedbams.mix(HIC_BWAMEM2.out.mappedbams) + mappedbams.map{meta, bams -> [[id: meta.id, hap_id:hap_id], bams]} + .set { mappedbams } + // // LOGIC: GENERATE INDEX OF REFERENCE // diff --git a/subworkflows/local/scaffolding.nf b/subworkflows/local/scaffolding.nf index b15b443..10f3226 100644 --- a/subworkflows/local/scaffolding.nf +++ b/subworkflows/local/scaffolding.nf @@ -16,6 +16,7 @@ workflow SCAFFOLDING { bed_in // tuple(meta, bed) fasta_in // tuple(meta, fasta) cool_bin // val: cooler cload parameter + hap_id // val: hap1/hap2/empty main: ch_versions = Channel.empty() @@ -37,9 +38,14 @@ workflow SCAFFOLDING { .set{ scaf_ref_fai } // + // LOGIC: MIX IN THE HAPLOTYPE ID TO CONTROL THE OUTPUT SUFFIX + // + bed_in.map{ meta, bed -> [[id:meta.id, hap_id:hap_id],bed] } + .set{ bed_in_hap } + // // MODULE: PERFORM SCAAFFOLDING WITH YAHS // - YAHS( bed_in, scaf_ref, scaf_ref_fai ) + YAHS( bed_in_hap , scaf_ref, scaf_ref_fai ) ch_versions = ch_versions.mix(YAHS.out.versions) // @@ -59,7 +65,7 @@ workflow SCAFFOLDING { YAHS.out.binary.join(YAHS.out.scaffolds_agp) .combine(scaf_ref) .combine(scaf_ref_fai) - .map{meta, binary, agp, fa, fai -> [meta, binary, agp, fai]} + .map{meta, binary, agp, fa, fai -> [[id:meta.id, hap_id:hap_id], binary, agp, fai]} .set{ch_merge} // @@ -71,7 +77,7 @@ workflow SCAFFOLDING { // // LOGIC: BIN CONTACT PAIRS // - JUICER_PRE.out.pairs.join(bed_in) + JUICER_PRE.out.pairs.join(bed_in_hap) .combine(Channel.of(cool_bin)) .set{ch_juicer} diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 14fe3c7..2e5e389 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -340,13 +340,13 @@ workflow GENOMEASSEMBLY { // // SUBWORKFLOW: MAP HIC DATA TO THE PRIMARY ASSEMBLY // - HIC_MAPPING ( primary_contigs_ch,crams_ch,hic_aligner_ch ) + HIC_MAPPING ( primary_contigs_ch,crams_ch,hic_aligner_ch, "") ch_versions = ch_versions.mix(HIC_MAPPING.out.versions) // // SUBWORKFLOW: SCAFFOLD THE PRIMARY ASSEMBLY // - SCAFFOLDING( HIC_MAPPING.out.bed, primary_contigs_ch, cool_bin ) + SCAFFOLDING( HIC_MAPPING.out.bed, primary_contigs_ch, cool_bin, "") ch_versions = ch_versions.mix(SCAFFOLDING.out.versions) // @@ -367,37 +367,36 @@ workflow GENOMEASSEMBLY { unset_busco_alts ) - if ( hifiasm_hic_on ) { // // SUBWORKFLOW: MAP HIC DATA TO THE HAP1 CONTIGS // - HIC_MAPPING_HAP1 ( RAW_ASSEMBLY.out.hap1_hic_contigs, crams_ch, hic_aligner_ch ) + HIC_MAPPING_HAP1 ( RAW_ASSEMBLY.out.hap1_hic_contigs, crams_ch, hic_aligner_ch, 'hap1' ) ch_versions = ch_versions.mix(HIC_MAPPING_HAP1.out.versions) // // SUBWORKFLOW: SCAFFOLD HAP1 // - SCAFFOLDING_HAP1( HIC_MAPPING_HAP1.out.bed, RAW_ASSEMBLY.out.hap1_hic_contigs, cool_bin ) + SCAFFOLDING_HAP1( HIC_MAPPING_HAP1.out.bed, RAW_ASSEMBLY.out.hap1_hic_contigs, cool_bin, 'hap1' ) ch_versions = ch_versions.mix(SCAFFOLDING_HAP1.out.versions) // // SUBWORKFLOW: MAP HIC DATA TO THE HAP2 CONTIGS // - HIC_MAPPING_HAP2 ( RAW_ASSEMBLY.out.hap2_hic_contigs, crams_ch, hic_aligner_ch ) + HIC_MAPPING_HAP2 ( RAW_ASSEMBLY.out.hap2_hic_contigs, crams_ch, hic_aligner_ch, 'hap2' ) ch_versions = ch_versions.mix(HIC_MAPPING_HAP2.out.versions) // // SUBWORKFLOW: SCAFFOLD HAP2 // - SCAFFOLDING_HAP2( HIC_MAPPING_HAP2.out.bed, RAW_ASSEMBLY.out.hap2_hic_contigs, cool_bin ) + SCAFFOLDING_HAP2( HIC_MAPPING_HAP2.out.bed, RAW_ASSEMBLY.out.hap2_hic_contigs, cool_bin, 'hap2' ) ch_versions = ch_versions.mix(SCAFFOLDING_HAP2.out.versions) // // LOGIC: CREATE A CHANNEL FOR THE FULL HAP1/HAP2 ASSEMBLY // SCAFFOLDING_HAP1.out.fasta.combine(SCAFFOLDING_HAP2.out.fasta) - .map{meta_s, fasta_s, meta_h, fasta_h -> [ meta_h, fasta_s, fasta_h ]} + .map{meta_s, fasta_s, meta_h, fasta_h -> [ [id:meta_h.id], fasta_s, fasta_h ]} .set{ stats_haps_input_ch } //