From 9ff5a1ee2feb10a875c02db29bfa7b4f49a3d5aa Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 14 Jul 2023 11:59:04 +0200 Subject: [PATCH 01/12] Adding new qualimap --- CITATIONS.md | 4 + conf/modules.config | 8 ++ docs/output.md | 27 ++++++ modules.json | 5 + modules/nf-core/qualimap/bamqc/main.nf | 123 ++++++++++++++++++++++++ modules/nf-core/qualimap/bamqc/meta.yml | 47 +++++++++ nextflow.config | 4 + nextflow_schema.json | 8 ++ workflows/eager.nf | 21 ++++ 9 files changed, 247 insertions(+) create mode 100644 modules/nf-core/qualimap/bamqc/main.nf create mode 100644 modules/nf-core/qualimap/bamqc/meta.yml diff --git a/CITATIONS.md b/CITATIONS.md index 11f540b91..5f6a4c981 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -62,6 +62,10 @@ > Daley, T., & Smith, A. D. (2013). Predicting the molecular complexity of sequencing libraries. Nature Methods, 10(4), 325–327. doi: [10.1038/nmeth.2375](https://doi.org/10.1038/nmeth.2375) +- [QualiMap](https://doi.org/10.1093/bioinformatics/btv566) + + > QualiMap Okonechnikov, K., Conesa, A., & García-Alcalde, F. (2016). Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics , 32(2), 292–294. Download: http://qualimap.bioinfo.cipf.es/ + - [DamageProfiler](https://doi.org/10.1093/bioinformatics/btab190) > DamageProfiler Neukamm, J., Peltzer, A., & Nieselt, K. (2020). DamageProfiler: Fast damage pattern calculation for ancient DNA. In Bioinformatics (btab190). doi: [10.1093/bioinformatics/btab190](https://doi.org/10.1093/bioinformatics/btab190). Download: https://github.com/Integrative-Transcriptomics/DamageProfiler diff --git a/conf/modules.config b/conf/modules.config index f8131987b..57da3be20 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -499,6 +499,14 @@ process { ] } + withName: "QUALIMAP_BAMQC" { + publishDir = [ + path: { "${params.outdir}/mapstats/qualimap" }, + mode: params.publish_dir_mode, + enabled: true + ] + } + withName: "DAMAGEPROFILER" { ext.args = [ "-l ${params.damageprofiler_length}", diff --git a/docs/output.md b/docs/output.md index 13520a4c4..950715db7 100644 --- a/docs/output.md +++ b/docs/output.md @@ -266,6 +266,33 @@ The resulting histogram file will contain estimated deduplication statistics at These curves will be displayed in the pipeline run's MultiQC report, however you can also use this file for plotting yourself for further exploration e.g. in R to find your sequencing target depth. +#### QualiMap + +
+Output files + +- `qualimap/` + + - `/` + - `*.html`: in-depth report including percent coverage, depth coverage, GC content, etc. of mapped reads + - `genome_results.txt` + - `css/`: HTML CSS styling used for the report + - `images_qualimapReport/`: PNG version of images from the HTML report. + - `raw_data_qualimapReport/`: The raw data used to render the HTML report as TXT files. + +
+ +[QualiMap](http://qualimap.bioinfo.cipf.es/) +is a tool which provides statistics on the quality of the mapping of your reads to your reference genome. It allows you to assess how well covered your reference genome is by your data, both in 'fold' depth (average number of times a given base on the reference is covered by a read) and 'percentage' (the percentage of all bases on the reference genome that is covered at a given fold depth). These outputs allow you to make decision if you have enough quality data for downstream applications like genotyping, and how to adjust the parameters for those tools accordingly. + +> NB: Neither fold coverage nor percent coverage on there own is sufficient to assess whether you have a high quality mapping. Abnormally high fold coverages of a smaller region such as highly conserved genes or un-removed-adapter-containing reference genomes can artificially inflate the mean coverage, yet a high percent coverage is not useful if all bases of the genome are covered at just 1x coverage. + +**Note that many of the statistics from this module are displayed in the General Stats table, as they represent single values that are not plottable.** + +You will receive output for each sample. This means you will statistics of deduplicated values of all types of libraries combined in a single value (i.e. non-UDG treated, full-UDG, paired-end, single-end all together). + +> ⚠️ Warning: If your library has no reads mapping to the reference, this will result in an empty BAM file. Qualimap will therefore not produce any output even if a BAM exists! + #### DamageProfiler
diff --git a/modules.json b/modules.json index e96f3ee4f..a566f1cd2 100644 --- a/modules.json +++ b/modules.json @@ -125,6 +125,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "qualimap/bamqc": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "samtools/faidx": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/nf-core/qualimap/bamqc/main.nf b/modules/nf-core/qualimap/bamqc/main.nf new file mode 100644 index 000000000..fef7307a0 --- /dev/null +++ b/modules/nf-core/qualimap/bamqc/main.nf @@ -0,0 +1,123 @@ +process QUALIMAP_BAMQC { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::qualimap=2.2.2d" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1' : + 'biocontainers/qualimap:2.2.2d--1' }" + + input: + tuple val(meta), path(bam) + path gff + + output: + tuple val(meta), path("${prefix}"), emit: results + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def collect_pairs = meta.single_end ? '' : '--collect-overlap-pairs' + def memory = (task.memory.mega*0.8).intValue() + 'M' + def regions = gff ? "--gff $gff" : '' + + def strandedness = 'non-strand-specific' + if (meta.strandedness == 'forward') { + strandedness = 'strand-specific-forward' + } else if (meta.strandedness == 'reverse') { + strandedness = 'strand-specific-reverse' + } + """ + unset DISPLAY + mkdir -p tmp + export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp + qualimap \\ + --java-mem-size=$memory \\ + bamqc \\ + $args \\ + -bam $bam \\ + $regions \\ + -p $strandedness \\ + $collect_pairs \\ + -outdir $prefix \\ + -nt $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + """ + mkdir -p $prefix/css + mkdir $prefix/images_qualimapReport + mkdir $prefix/raw_data_qualimapReport + cd $prefix/css + touch agogo.css + touch basic.css + touch bgtop.png + touch comment-close.png + touch doctools.js + touch down-pressed.png + touch jquery.js + touch plus.png + touch qualimap_logo_small.png + touch searchtools.js + touch up.png + touch websupport.js + touch ajax-loader.gif + touch bgfooter.png + touch comment-bright.png + touch comment.png + touch down.png + touch file.png + touch minus.png + touch pygments.css + touch report.css + touch underscore.js + touch up-pressed.png + cd ../images_qualimapReport/ + touch genome_coverage_0to50_histogram.png + touch genome_coverage_quotes.png + touch genome_insert_size_across_reference.png + touch genome_mapping_quality_histogram.png + touch genome_uniq_read_starts_histogram.png + touch genome_coverage_across_reference.png + touch genome_gc_content_per_window.png + touch genome_insert_size_histogram.png + touch genome_reads_clipping_profile.png + touch genome_coverage_histogram.png + touch genome_homopolymer_indels.png + touch genome_mapping_quality_across_reference.png + touch genome_reads_content_per_read_position.png + cd ../raw_data_qualimapReport + touch coverage_across_reference.txt + touch genome_fraction_coverage.txt + touch insert_size_histogram.txt + touch mapped_reads_nucleotide_content.txt + touch coverage_histogram.txt + touch homopolymer_indels.txt + touch mapped_reads_clipping_profile.txt + touch mapping_quality_across_reference.txt + touch duplication_rate_histogram.txt + touch insert_size_across_reference.txt + touch mapped_reads_gc-content_distribution.txt + touch mapping_quality_histogram.txt + cd ../ + touch genome_results.txt + touch qualimapReport.html + cd ../ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/qualimap/bamqc/meta.yml b/modules/nf-core/qualimap/bamqc/meta.yml new file mode 100644 index 000000000..303532eb7 --- /dev/null +++ b/modules/nf-core/qualimap/bamqc/meta.yml @@ -0,0 +1,47 @@ +name: qualimap_bamqc +description: Evaluate alignment data +keywords: + - quality control + - qc + - bam +tools: + - qualimap: + description: | + Qualimap 2 is a platform-independent application written in + Java and R that provides both a Graphical User Interface and + a command-line interface to facilitate the quality control of + alignment sequencing data and its derivatives like feature counts. + homepage: http://qualimap.bioinfo.cipf.es/ + documentation: http://qualimap.conesalab.org/doc_html/index.html + doi: 10.1093/bioinformatics/bts503 + licence: ["GPL-2.0-only"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - gff: + type: file + description: Feature file with regions of interest + pattern: "*.{gff,gtf,bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results: + type: dir + description: Qualimap results dir + pattern: "*/*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@phue" diff --git a/nextflow.config b/nextflow.config index e918e9d8a..d6ddfb611 100644 --- a/nextflow.config +++ b/nextflow.config @@ -129,6 +129,10 @@ params { skip_deduplication = false deduplication_tool = 'markduplicates' + // Qualimap + skip_qualimap = false + snpcapture_bed = null + // MtNucRatio run_mtnucratio = false mitochondrion_header = 'MT' diff --git a/nextflow_schema.json b/nextflow_schema.json index 19838b4e7..b28033c22 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -789,6 +789,14 @@ "description": "Turns on defects mode to extrapolate without testing for defects (lc_extrap mode only).", "help_text": "Activates defects mode of `lc_extrap`, which does the extrapolation without testing for defects.\n\n> Modifies preseq lc_extrap parameter: `-D`", "fa_icon": "fab fa-creative-commons-sampling-plus" + }, + "skip_qualimap": { + "type": "boolean", + "default": "false" + }, + "snpcapture_bed": { + "type": "string", + "description": "Path to snp capture BED" } }, "fa_icon": "fas fa-search" diff --git a/workflows/eager.nf b/workflows/eager.nf index 08d1f19fb..af99e92a6 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -86,6 +86,7 @@ include { PRESEQ_CCURVE } from '../modules/nf-core/preseq/ccurve/m include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' include { FALCO } from '../modules/nf-core/falco/main' include { MTNUCRATIO } from '../modules/nf-core/mtnucratio/main' +include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' /* @@ -123,6 +124,13 @@ workflow EAGER { if ( params.preprocessing_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/eager] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --preprocessing_adapterlist ${params.preprocessing_adapterlist}" } + // QualiMap + if ( params.snpcapture_bed ) { + ch_snpcapture_bed = Channel.fromPath(params.snpcapture_bed, checkIfExists: true).collect() + } else { + ch_snpcapture_bed = [] + } + // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // @@ -233,6 +241,19 @@ workflow EAGER { ch_dedupped_flagstat = Channel.empty() } + // + // MODULE QUALIMAP + // + + if ( !params.skip_qualimap ) { + ch_qualimap_input = ch_dedupped_bams + .map { + meta, bam, bai -> + [ meta, bam ] + } + QUALIMAP_BAMQC(ch_qualimap_input, ch_snpcapture_bed) + } + // // Section: Metagenomics screening // From 3596bcdc3c2ec0fc879e0d342804d02eb6c93d77 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 21 Jul 2023 12:09:09 +0200 Subject: [PATCH 02/12] Apply suggestions from code review --- docs/output.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index a6531b7c9..33b939987 100644 --- a/docs/output.md +++ b/docs/output.md @@ -406,7 +406,6 @@ You will receive output for each sample. This means you will statistics of dedup > ⚠️ Warning: If your library has no reads mapping to the reference, this will result in an empty BAM file. Qualimap will therefore not produce any output even if a BAM exists! - #### Bedtools
From 169726b376a819110a09abdb1eede93f22fe5426 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 21 Jul 2023 12:10:02 +0200 Subject: [PATCH 03/12] Apply suggestions from code review --- workflows/eager.nf | 9 --------- 1 file changed, 9 deletions(-) diff --git a/workflows/eager.nf b/workflows/eager.nf index ed03b42ac..224699916 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -87,15 +87,6 @@ include { CALCULATE_DAMAGE } from '../subworkflows/local/calculate_ // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' -include { PRESEQ_CCURVE } from '../modules/nf-core/preseq/ccurve/main' -include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' -include { FALCO } from '../modules/nf-core/falco/main' -include { MTNUCRATIO } from '../modules/nf-core/mtnucratio/main' - include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' From 31008f271df428278d9b394b2631fbbc63990946 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 21 Jul 2023 14:48:17 +0200 Subject: [PATCH 04/12] adding versions --- workflows/eager.nf | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/workflows/eager.nf b/workflows/eager.nf index 224699916..82c7545cc 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -98,8 +98,8 @@ include { MTNUCRATIO } from '../modules/n include { HOST_REMOVAL } from '../modules/local/host_removal' include { ENDORSPY } from '../modules/nf-core/endorspy/main' include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTATS_BAM_INPUT } from '../modules/nf-core/samtools/flagstat/main' -include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH ; BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main' -include { SAMTOOLS_VIEW_GENOME } from '../modules/local/samtools_view_genome.nf' +include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH ; BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main' +include { SAMTOOLS_VIEW_GENOME } from '../modules/local/samtools_view_genome.nf' include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' @@ -157,11 +157,11 @@ workflow EAGER { file(params.input) ) ch_versions = ch_versions.mix( INPUT_CHECK.out.versions ) - + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ // ! There is currently no tooling to help you write a sample sheet schema - + // // SUBWORKFLOW: Indexing of reference files // @@ -291,6 +291,7 @@ workflow EAGER { [ meta, bam ] } QUALIMAP_BAMQC(ch_qualimap_input, ch_snpcapture_bed) + ch_versions = ch_versions.mix( QUALIMAP_BAMQC.out.versions ) } // @@ -418,9 +419,9 @@ workflow EAGER { // - // MODULE: Bedtools coverage + // MODULE: Bedtools coverage // - + if ( params.run_bedtools_coverage ) { ch_anno_for_bedtools = Channel.fromPath(params.mapstats_bedtools_featurefile, checkIfExists: true).collect() @@ -435,7 +436,7 @@ workflow EAGER { SAMTOOLS_VIEW_GENOME(ch_dedupped_bams) ch_genome_for_bedtools = SAMTOOLS_VIEW_GENOME.out.genome - + BEDTOOLS_COVERAGE_BREADTH(ch_dedupped_for_bedtools, ch_genome_for_bedtools) BEDTOOLS_COVERAGE_DEPTH(ch_dedupped_for_bedtools, ch_genome_for_bedtools) @@ -443,7 +444,7 @@ workflow EAGER { ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_BREADTH.out.versions ) ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_DEPTH.out.versions ) } - + // // SUBWORKFLOW: Calculate Damage From 1c101b120f883476e7c17d4c51c3063fcfe46d67 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Sat, 22 Jul 2023 11:04:58 +0200 Subject: [PATCH 05/12] Adding multiqc --- workflows/eager.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/eager.nf b/workflows/eager.nf index 82c7545cc..de8aad794 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -291,7 +291,8 @@ workflow EAGER { [ meta, bam ] } QUALIMAP_BAMQC(ch_qualimap_input, ch_snpcapture_bed) - ch_versions = ch_versions.mix( QUALIMAP_BAMQC.out.versions ) + ch_versions = ch_versions.mix( QUALIMAP_BAMQC.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( QUALIMAP_BAMQC.out.results.collect{it[1]}.ifEmpty([]) ) } // From 7dcb5738f85eab7c1bb46f3dac636161a7e11d16 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 28 Jul 2023 10:36:37 +0200 Subject: [PATCH 06/12] moving multiqc_qualimap --- workflows/eager.nf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/workflows/eager.nf b/workflows/eager.nf index de8aad794..439e79934 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -292,7 +292,6 @@ workflow EAGER { } QUALIMAP_BAMQC(ch_qualimap_input, ch_snpcapture_bed) ch_versions = ch_versions.mix( QUALIMAP_BAMQC.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( QUALIMAP_BAMQC.out.results.collect{it[1]}.ifEmpty([]) ) } // @@ -516,6 +515,10 @@ workflow EAGER { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) //ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) // Replaced with custom mixing + if ( !params.skip_qualimap ) { + ch_multiqc_files = ch_multiqc_files.mix( QUALIMAP_BAMQC.out.results.collect{it[1]}.ifEmpty([]) ) + } + MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), From 3c0877102ff5e102c1ec96c769eff8b7b7bae6a3 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 28 Jul 2023 10:43:59 +0200 Subject: [PATCH 07/12] Apply suggestions from code review Co-authored-by: James A. Fellows Yates --- nextflow.config | 1 - workflows/eager.nf | 10 +++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index af7ac4034..a20ae5a5e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -154,7 +154,6 @@ params { contamination_estimation_angsd_minq = 30 contamination_estimation_angsd_hapmap = "${baseDir}/assets/angsd_resources/HapMapChrX.gz" - // MtNucRatio run_mtnucratio = false mitochondrion_header = 'MT' diff --git a/workflows/eager.nf b/workflows/eager.nf index de8aad794..96c13e942 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -100,8 +100,7 @@ include { ENDORSPY } from '../modules/n include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTATS_BAM_INPUT } from '../modules/nf-core/samtools/flagstat/main' include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH ; BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main' include { SAMTOOLS_VIEW_GENOME } from '../modules/local/samtools_view_genome.nf' -include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' - +include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -140,15 +139,14 @@ workflow EAGER { // QualiMap if ( params.snpcapture_bed ) { - ch_snpcapture_bed = Channel.fromPath(params.snpcapture_bed, checkIfExists: true).collect() + ch_snpcapture_bed = Channel.fromPath(params.snpcapture_bed, checkIfExists: true).collect() } else { - ch_snpcapture_bed = [] + ch_snpcapture_bed = [] } // Contamination estimation hapmap_file = file(params.contamination_estimation_angsd_hapmap, checkIfExists:true) - // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // @@ -157,7 +155,6 @@ workflow EAGER { file(params.input) ) ch_versions = ch_versions.mix( INPUT_CHECK.out.versions ) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ // ! There is currently no tooling to help you write a sample sheet schema @@ -446,7 +443,6 @@ workflow EAGER { ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_DEPTH.out.versions ) } - // // SUBWORKFLOW: Calculate Damage // From 324fd278222bbaf8fa563935997a17fb38db1fda Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 28 Jul 2023 12:22:02 +0200 Subject: [PATCH 08/12] Apply suggestions from code review Co-authored-by: James A. Fellows Yates Co-authored-by: Thiseas C. Lamnidis --- conf/modules.config | 2 +- docs/output.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 4bbd59519..904e63981 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -822,7 +822,7 @@ process { withName: "QUALIMAP_BAMQC" { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } publishDir = [ - path: { "${params.outdir}/mapstats/qualimap" }, + path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/${meta.sample_id}/}" }, mode: params.publish_dir_mode, enabled: true ] diff --git a/docs/output.md b/docs/output.md index 33b939987..d84c9ec02 100644 --- a/docs/output.md +++ b/docs/output.md @@ -398,7 +398,7 @@ These curves will be displayed in the pipeline run's MultiQC report, however you [QualiMap](http://qualimap.bioinfo.cipf.es/) is a tool which provides statistics on the quality of the mapping of your reads to your reference genome. It allows you to assess how well covered your reference genome is by your data, both in 'fold' depth (average number of times a given base on the reference is covered by a read) and 'percentage' (the percentage of all bases on the reference genome that is covered at a given fold depth). These outputs allow you to make decision if you have enough quality data for downstream applications like genotyping, and how to adjust the parameters for those tools accordingly. -> NB: Neither fold coverage nor percent coverage on there own is sufficient to assess whether you have a high quality mapping. Abnormally high fold coverages of a smaller region such as highly conserved genes or un-removed-adapter-containing reference genomes can artificially inflate the mean coverage, yet a high percent coverage is not useful if all bases of the genome are covered at just 1x coverage. +> NB: Neither fold coverage nor percent coverage on its own is sufficient to assess whether you have a high quality mapping. Abnormally high fold coverages of a smaller region such as highly conserved genes or un-removed-adapter-containing reference genomes can artificially inflate the mean coverage, yet a high percent coverage is not useful if all bases of the genome are covered at just 1x coverage. **Note that many of the statistics from this module are displayed in the General Stats table, as they represent single values that are not plottable.** From bf50313dd74e9e8d5fef6592044e1335069696c2 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 4 Aug 2023 12:04:46 +0200 Subject: [PATCH 09/12] Adding the snpcapure to CI testing --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4df3b5a51..5437003e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: - "-profile test,docker --mapping_tool bwamem --run_mapdamage_rescaling --run_pmd_filtering --run_trim_bam" - "-profile test,docker --mapping_tool bowtie2" - "-profile test,docker --skip_preprocessing" - - "-profile test_humanbam,docker --run_mtnucratio --run_contamination_estimation_angsd" + - "-profile test_humanbam,docker --run_mtnucratio --run_contamination_estimation_angsd --snpcapture_bed 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'" - "-profile test_multiref,docker" ## TODO add damage manipulation here instead once it goes multiref steps: - name: Check out pipeline code From d46a98c03664daa851addaa612acf80b0f30bf57 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 18 Aug 2023 11:56:27 +0200 Subject: [PATCH 10/12] Fixing testing gzip bed --- nextflow_schema.json | 2 +- workflows/eager.nf | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b08654252..0596e2332 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -965,7 +965,7 @@ }, "snpcapture_bed": { "type": "string", - "description": "Path to snp capture BED" + "description": "Path to snp capture in BED format." } }, "fa_icon": "fas fa-search" diff --git a/workflows/eager.nf b/workflows/eager.nf index cc7bb3e89..21816c255 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -101,6 +101,7 @@ include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTATS_BAM_INPUT } from '../modules/n include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH ; BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main' include { SAMTOOLS_VIEW_GENOME } from '../modules/local/samtools_view_genome.nf' include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' +include { GUNZIP as GUNZIP_SNPBED } from '../modules/nf-core/gunzip/main.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -139,7 +140,17 @@ workflow EAGER { // QualiMap if ( params.snpcapture_bed ) { - ch_snpcapture_bed = Channel.fromPath(params.snpcapture_bed, checkIfExists: true).collect() + ch_snpcapture_bed_gunzip = Channel.fromPath(params.snpcapture_bed, checkIfExists: true).collect().map{ + file -> + meta = file.simpleName + [meta,file] + }.branch{ + meta, bed -> + forgunzip: bed[0].extension == "gz" + skip: true + } + ch_snpcapture_bed = GUNZIP_SNPBED(ch_snpcapture_bed_gunzip.forgunzip).gunzip.mix(ch_snpcapture_bed_gunzip).map{it[1]} + } else { ch_snpcapture_bed = [] } From 7017c50796ed1722c5b78d408251351c6907d1a8 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 25 Aug 2023 11:37:46 +0200 Subject: [PATCH 11/12] Adding Thiseas suggestion --- workflows/eager.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/eager.nf b/workflows/eager.nf index 21816c255..506758587 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -149,7 +149,7 @@ workflow EAGER { forgunzip: bed[0].extension == "gz" skip: true } - ch_snpcapture_bed = GUNZIP_SNPBED(ch_snpcapture_bed_gunzip.forgunzip).gunzip.mix(ch_snpcapture_bed_gunzip).map{it[1]} + ch_snpcapture_bed = GUNZIP_SNPBED(ch_snpcapture_bed_gunzip.forgunzip).gunzip.mix(ch_snpcapture_bed_gunzip.skip).map{it[1]} } else { ch_snpcapture_bed = [] From b99321c372d9fc8333699ba9988871adee73b5ba Mon Sep 17 00:00:00 2001 From: Judith Ballesteros Date: Fri, 25 Aug 2023 12:50:08 +0200 Subject: [PATCH 12/12] Apply suggestions from code review Co-authored-by: Thiseas C. Lamnidis --- nextflow_schema.json | 2 +- workflows/eager.nf | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 0596e2332..904c7ef61 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -965,7 +965,7 @@ }, "snpcapture_bed": { "type": "string", - "description": "Path to snp capture in BED format." + "description": "Path to snp capture in BED format. Provided file can also be gzipped." } }, "fa_icon": "fas fa-search" diff --git a/workflows/eager.nf b/workflows/eager.nf index 506758587..8024f354b 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -140,15 +140,18 @@ workflow EAGER { // QualiMap if ( params.snpcapture_bed ) { - ch_snpcapture_bed_gunzip = Channel.fromPath(params.snpcapture_bed, checkIfExists: true).collect().map{ - file -> - meta = file.simpleName - [meta,file] - }.branch{ - meta, bed -> + ch_snpcapture_bed_gunzip = Channel.fromPath( params.snpcapture_bed, checkIfExists: true ) + .collect() + .map { + file -> + meta = file.simpleName + [meta,file] + } + .branch { + meta, bed -> forgunzip: bed[0].extension == "gz" skip: true - } + } ch_snpcapture_bed = GUNZIP_SNPBED(ch_snpcapture_bed_gunzip.forgunzip).gunzip.mix(ch_snpcapture_bed_gunzip.skip).map{it[1]} } else {