Skip to content

Commit

Permalink
Parametrize hap1/hap2 within meta for scaffolding
Browse files Browse the repository at this point in the history
  • Loading branch information
Ksenia Krasheninnikova committed Aug 16, 2024
1 parent b0a9671 commit fc3c7f9
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 136 deletions.
145 changes: 22 additions & 123 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -657,204 +657,104 @@ process {

// Scaffolding hap1/hap2
if (params.hifiasm_hic_on) {
// hap1 scaffolding

withName: '.*HIC_MAPPING_HAP1:SAMTOOLS_MARKDUP_HIC_MAPPING' {
withName: '.*HIC_MAPPING_HAP.*:SAMTOOLS_MARKDUP_HIC_MAPPING' {
ext.prefix = { "${meta.id}_mkdup" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP1:BAMTOBED_SORT' {
withName: '.*HIC_MAPPING_HAP.*:BAMTOBED_SORT' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}


withName: '.*HIC_MAPPING_HAP1:CONVERT_STATS:SAMTOOLS_STATS' {
withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_STATS' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP1:CONVERT_STATS:SAMTOOLS_FLAGSTAT' {
withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_FLAGSTAT' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP1:CONVERT_STATS:SAMTOOLS_IDXSTATS' {
withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_IDXSTATS' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP1:YAHS' {
ext.prefix = 'hap1'
withName: '.*SCAFFOLDING_HAP.*:YAHS' {
ext.prefix = { "${meta.hap_id}" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

}

withName: '.*SCAFFOLDING_HAP1:COOLER_CLOAD' {
withName: '.*SCAFFOLDING_HAP.*:COOLER_CLOAD' {
// Positions in the input file are zero-based;
// chrom1 field number (one-based) is 2;
// pos1 field number (one-based) is 3;
// chrom2 field number (one-based) is 6;
// pos2 field number (one-based) is 7
ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP1:PRETEXTSNAPSHOT' {
withName: '.*SCAFFOLDING_HAP.*:PRETEXTSNAPSHOT' {
// Make one plot containing all sequences
ext.args = '--sequences \"=full\"'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP1:JUICER_TOOLS_PRE' {
withName: '.*SCAFFOLDING_HAP.*:JUICER_TOOLS_PRE' {
ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar'
ext.juicer_jvm_params = '-Xms1g -Xmx6g'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP1:JUICER_PRE' {
withName: '.*SCAFFOLDING_HAP.*:JUICER_PRE' {
ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'"
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}


// End of hap1 scaffolding

// hap2 scaffolding

withName: '.*HIC_MAPPING_HAP2:SAMTOOLS_MARKDUP_HIC_MAPPING' {
ext.prefix = { "${meta.id}_mkdup" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP2:BAMTOBED_SORT' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}


withName: '.*HIC_MAPPING_HAP2:CONVERT_STATS:SAMTOOLS_STATS' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP2:CONVERT_STATS:SAMTOOLS_FLAGSTAT' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP2:CONVERT_STATS:SAMTOOLS_IDXSTATS' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP2:YAHS' {
ext.prefix = 'hap2'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

}

withName: '.*SCAFFOLDING_HAP2:COOLER_CLOAD' {
// Positions in the input file are zero-based;
// chrom1 field number (one-based) is 2;
// pos1 field number (one-based) is 3;
// chrom2 field number (one-based) is 6;
// pos2 field number (one-based) is 7
ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP2:PRETEXTSNAPSHOT' {
// Make one plot containing all sequences
ext.args = '--sequences \"=full\"'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP2:JUICER_TOOLS_PRE' {
ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar'
ext.juicer_jvm_params = '-Xms1g -Xmx6g'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP2:JUICER_PRE' {
ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'"
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}
// End of hap2 scaffolding
// End of hap1/hap2 scaffolding

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:GFASTATS_PRI' {
ext.prefix = { "${meta.id}_scaffolds_final" }
Expand Down Expand Up @@ -911,7 +811,6 @@ process {
// End of Scaffolding hap1/hap2
// End of Set up of the scaffolding pipeline


//Set up of assembly stats subworkflow
withName: 'BUSCO' {
ext.args = "--mode genome"
Expand Down
9 changes: 7 additions & 2 deletions subworkflows/local/hic_mapping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ workflow HIC_MAPPING {
reference_tuple // Channel [ val(meta), path(file) ]
hic_reads_path // Channel [ val(meta), path(directory) ]
hic_aligner_ch // Channel [ val(meta), val(hic_aligner)]
hap_id // Value hap_id

main:
ch_versions = Channel.empty()
Expand Down Expand Up @@ -67,7 +68,8 @@ workflow HIC_MAPPING {
bwamem2 : it[0].aligner == "bwamem2"
}
.set{ch_aligner}



//
// SUBWORKFLOW: mapping hic reads using minimap2
//
Expand All @@ -78,7 +80,7 @@ workflow HIC_MAPPING {
ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions )
mappedbams = HIC_MINIMAP2.out.mappedbams

//
//
// SUBWORKFLOW: mapping hic reads using bwamem2
//
HIC_BWAMEM2 (
Expand All @@ -88,6 +90,9 @@ workflow HIC_MAPPING {
ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions )
mappedbams = mappedbams.mix(HIC_BWAMEM2.out.mappedbams)

mappedbams.map{meta, bams -> [[id: meta.id, hap_id:hap_id], bams]}
.set { mappedbams }

//
// LOGIC: GENERATE INDEX OF REFERENCE
//
Expand Down
12 changes: 9 additions & 3 deletions subworkflows/local/scaffolding.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ workflow SCAFFOLDING {
bed_in // tuple(meta, bed)
fasta_in // tuple(meta, fasta)
cool_bin // val: cooler cload parameter
hap_id // val: hap1/hap2/empty

main:
ch_versions = Channel.empty()
Expand All @@ -37,9 +38,14 @@ workflow SCAFFOLDING {
.set{ scaf_ref_fai }

//
// LOGIC: MIX IN THE HAPLOTYPE ID TO CONTROL THE OUTPUT SUFFIX
//
bed_in.map{ meta, bed -> [[id:meta.id, hap_id:hap_id],bed] }
.set{ bed_in_hap }
//
// MODULE: PERFORM SCAAFFOLDING WITH YAHS
//
YAHS( bed_in, scaf_ref, scaf_ref_fai )
YAHS( bed_in_hap , scaf_ref, scaf_ref_fai )
ch_versions = ch_versions.mix(YAHS.out.versions)

//
Expand All @@ -59,7 +65,7 @@ workflow SCAFFOLDING {
YAHS.out.binary.join(YAHS.out.scaffolds_agp)
.combine(scaf_ref)
.combine(scaf_ref_fai)
.map{meta, binary, agp, fa, fai -> [meta, binary, agp, fai]}
.map{meta, binary, agp, fa, fai -> [[id:meta.id, hap_id:hap_id], binary, agp, fai]}
.set{ch_merge}

//
Expand All @@ -71,7 +77,7 @@ workflow SCAFFOLDING {
//
// LOGIC: BIN CONTACT PAIRS
//
JUICER_PRE.out.pairs.join(bed_in)
JUICER_PRE.out.pairs.join(bed_in_hap)
.combine(Channel.of(cool_bin))
.set{ch_juicer}

Expand Down
15 changes: 7 additions & 8 deletions workflows/genomeassembly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -340,13 +340,13 @@ workflow GENOMEASSEMBLY {
//
// SUBWORKFLOW: MAP HIC DATA TO THE PRIMARY ASSEMBLY
//
HIC_MAPPING ( primary_contigs_ch,crams_ch,hic_aligner_ch )
HIC_MAPPING ( primary_contigs_ch,crams_ch,hic_aligner_ch, "")
ch_versions = ch_versions.mix(HIC_MAPPING.out.versions)

//
// SUBWORKFLOW: SCAFFOLD THE PRIMARY ASSEMBLY
//
SCAFFOLDING( HIC_MAPPING.out.bed, primary_contigs_ch, cool_bin )
SCAFFOLDING( HIC_MAPPING.out.bed, primary_contigs_ch, cool_bin, "")
ch_versions = ch_versions.mix(SCAFFOLDING.out.versions)

//
Expand All @@ -367,37 +367,36 @@ workflow GENOMEASSEMBLY {
unset_busco_alts
)


if ( hifiasm_hic_on ) {
//
// SUBWORKFLOW: MAP HIC DATA TO THE HAP1 CONTIGS
//
HIC_MAPPING_HAP1 ( RAW_ASSEMBLY.out.hap1_hic_contigs, crams_ch, hic_aligner_ch )
HIC_MAPPING_HAP1 ( RAW_ASSEMBLY.out.hap1_hic_contigs, crams_ch, hic_aligner_ch, 'hap1' )
ch_versions = ch_versions.mix(HIC_MAPPING_HAP1.out.versions)

//
// SUBWORKFLOW: SCAFFOLD HAP1
//
SCAFFOLDING_HAP1( HIC_MAPPING_HAP1.out.bed, RAW_ASSEMBLY.out.hap1_hic_contigs, cool_bin )
SCAFFOLDING_HAP1( HIC_MAPPING_HAP1.out.bed, RAW_ASSEMBLY.out.hap1_hic_contigs, cool_bin, 'hap1' )
ch_versions = ch_versions.mix(SCAFFOLDING_HAP1.out.versions)

//
// SUBWORKFLOW: MAP HIC DATA TO THE HAP2 CONTIGS
//
HIC_MAPPING_HAP2 ( RAW_ASSEMBLY.out.hap2_hic_contigs, crams_ch, hic_aligner_ch )
HIC_MAPPING_HAP2 ( RAW_ASSEMBLY.out.hap2_hic_contigs, crams_ch, hic_aligner_ch, 'hap2' )
ch_versions = ch_versions.mix(HIC_MAPPING_HAP2.out.versions)

//
// SUBWORKFLOW: SCAFFOLD HAP2
//
SCAFFOLDING_HAP2( HIC_MAPPING_HAP2.out.bed, RAW_ASSEMBLY.out.hap2_hic_contigs, cool_bin )
SCAFFOLDING_HAP2( HIC_MAPPING_HAP2.out.bed, RAW_ASSEMBLY.out.hap2_hic_contigs, cool_bin, 'hap2' )
ch_versions = ch_versions.mix(SCAFFOLDING_HAP2.out.versions)

//
// LOGIC: CREATE A CHANNEL FOR THE FULL HAP1/HAP2 ASSEMBLY
//
SCAFFOLDING_HAP1.out.fasta.combine(SCAFFOLDING_HAP2.out.fasta)
.map{meta_s, fasta_s, meta_h, fasta_h -> [ meta_h, fasta_s, fasta_h ]}
.map{meta_s, fasta_s, meta_h, fasta_h -> [ [id:meta_h.id], fasta_s, fasta_h ]}
.set{ stats_haps_input_ch }

//
Expand Down

0 comments on commit fc3c7f9

Please sign in to comment.