Skip to content

Commit

Permalink
update bouncy-basenji branch
Browse files Browse the repository at this point in the history
  • Loading branch information
LilyAnderssonLee committed Apr 11, 2024
2 parents 5e8cf10 + c4faa69 commit ece7d5a
Show file tree
Hide file tree
Showing 49 changed files with 1,301 additions and 160 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,17 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v1.2dev - Bouncy Basenji [unreleased]

### `Added`

- [#417](https://github.com/nf-core/taxprofiler/pull/417) - Added reference-free metagenome estimation with Nonpareil (added by @jfy133)

## v1.1.6dev - [unreleased]

### `Added`

- [#417](https://github.com/nf-core/taxprofiler/pull/417) - Added reference-free metagenome estimation with Nonpareil (added by @jfy133)
- [#454](https://github.com/nf-core/taxprofiler/pull/454) Updated to nf-core pipeline template v2.13.1 (added by @LilyAnderssonLee & @sofstam)
- [#461](https://github.com/nf-core/taxprofiler/pull/461) Turned on 'strict' Nextflow evaluation runs (added by @jfy133)
- [#461](https://github.com/nf-core/taxprofiler/pull/461) Optimised database compression so each compressed input database is untarred once, and shared amongst each run with different parameters (added by @jfy133)
Expand Down
4 changes: 4 additions & 0 deletions CITATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@

> Schubert, M., Lindgreen, S., & Orlando, L. (2016). AdapterRemoval v2: rapid adapter trimming, identification, and read merging. BMC Research Notes, 9, 88. https://doi.org/10.1186/s13104-016-1900-2
- [Nonpareil](https://doi.org/10.1128/mSystems.00039-18)

- Rodriguez-R, L. M., Gunturu, S., Tiedje, J. M., Cole, J. R., & Konstantinidis, K. T. (2018). Nonpareil 3: Fast Estimation of Metagenomic Coverage and Sequence Diversity. mSystems, 3(3). https://doi.org/10.1128/mSystems.00039-18

- [Porechop](https://github.com/rrwick/Porechop)

> Wick, R. R., Judd, L. M., Gorrie, C. L., & Holt, K. E. (2017). Completing bacterial genome assemblies with multiplex MinION sequencing. Microbial Genomics, 3(10), e000132. https://doi.org/10.1099/mgen.0.000132
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
- Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
- Run merging
3. Supports statistics for host-read removal ([Samtools](http://www.htslib.org/))
3. Supports statistics metagenome coverage estimation ([Nonpareil](https://nonpareil.readthedocs.io/en/latest/)) and for host-read removal ([Samtools](http://www.htslib.org/))
4. Performs taxonomic classification and/or profiling using one or more of:
- [Kraken2](https://ccb.jhu.edu/software/kraken2/)
- [MetaPhlAn](https://huttenhower.sph.harvard.edu/metaphlan/)
Expand Down
57 changes: 48 additions & 9 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,44 @@ report_section_order:
order: -1001
"nf-core-taxprofiler-summary":
order: -1002
general_stats":
order: 1000
fastqc:
order: 900
fastqc-1:
order: 800
fastp:
order: 700
adapterRemoval:
order: 600
nonpareil_all_samples:
order: 500
porechop:
order: 400
bbduk:
order: 300
prinseqplusplus:
order: 200
filtlong:
order: 100
bowtie2:
order: 90
samtools:
order: 80
kraken:
order: 70
bracken:
order: 60
centrifuge:
order: 50
malt:
order: 40
diamond:
order: 30
kaiju:
order: 20
motus:
order: 10

export_plots: true

Expand All @@ -22,6 +60,7 @@ run_modules:
- fastqc
- adapterRemoval
- fastp
- custom_content
- bbduk
- prinseqplusplus
- porechop
Expand All @@ -34,7 +73,6 @@ run_modules:
- diamond
- malt
- motus
- custom_content

sp:
diamond:
Expand All @@ -43,6 +81,13 @@ sp:
fn_re: ".*(fastqc|falco)_data.txt$"
fastqc/zip:
fn: "*_fastqc.zip"
nonpareil_all_samples_mqc:
fn: "nonpareil_all_samples_mqc.png"

custom_data:
nonpareil_all_samples:
section_name: "Nonpareil"
description: "Nonpareil uses the redundancy of the reads in metagenomic datasets to estimate the average coverage and predict the amount of sequences that will be required to achieve “nearly complete coverage”. Plots here are not interactive - being exported directly from the tool. If you have difficulty reading the plot, please see the individual PNG files in the results directory. DOI: https://doi.org/10.1128/mSystems.00039-18"

top_modules:
- "fastqc":
Expand All @@ -59,13 +104,8 @@ top_modules:
path_filters_exclude:
- "*raw*"
extra: "If used in this run, Falco is a drop-in replacement for FastQC producing the same output, written by Guilherme de Sena Brandine and Andrew D. Smith."
- "fastp"
- "adapterRemoval"
- "porechop":
extra: "ℹ️: if you get the error message 'Error - was not able to plot data.' this means that porechop did not detect any adapters and therefore no statistics generated."
- "bbduk"
- "prinseqplusplus"
- "filtlong"
- "bowtie2":
name: "bowtie2"
- "samtools":
Expand Down Expand Up @@ -94,12 +134,11 @@ top_modules:
- "*.centrifuge.txt"
- "malt":
name: "MALT"
- "diamond"
- "kaiju":
name: "Kaiju"
- "motus"

#It is not possible to set placement for custom kraken and centrifuge columns.
# It is not possible to set placement for custom kraken
# and centrifuge columns.

table_columns_placement:
FastQC / Falco (pre-Trimming):
Expand Down
28 changes: 28 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,34 @@ process {
]
}

// Redundancy estimation with nonpareil
withName: NONPAREIL_NONPAREIL {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/nonpareil/" },
mode: params.publish_dir_mode,
pattern: '*.np{a,c,l,o}'
]
}

withName: 'NONPAREIL_CURVE' {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/nonpareil/" },
mode: params.publish_dir_mode,
pattern: '*.png'
]
}

withName: 'NONPAREIL_SET' {
ext.prefix = { "nonpareil_all_samples_mqc" }
publishDir = [
path: { "${params.outdir}/nonpareil/" },
mode: params.publish_dir_mode,
pattern: '*.png'
]
}

// AdapterRemoval separate output merging
withName: CAT_FASTQ {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
Expand Down
35 changes: 35 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params {
max_time = '6.h'

// Input data
<<<<<<< HEAD
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
perform_shortread_qc = true
Expand Down Expand Up @@ -48,6 +49,37 @@ params {
kraken2_save_reads = true
centrifuge_save_reads = true
run_profile_standardisation = true
=======
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_mergepairs = true
perform_shortread_redundancyestimation = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = true
run_malt = false
run_metaphlan = true
run_centrifuge = true
run_diamond = true
run_krakenuniq = true
run_motus = false
run_ganon = true
run_krona = true
run_kmcp = true
kmcp_mode = 0
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
run_profile_standardisation = true
>>>>>>> bouncy-basenji
}

process {
Expand All @@ -61,4 +93,7 @@ process {
withName: MEGAN_RMA2INFO_KRONA {
maxForks = 1
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
29 changes: 29 additions & 0 deletions conf/test_adapterremoval.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params {
max_time = '6.h'

// Input data
<<<<<<< HEAD
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
perform_shortread_qc = true
Expand All @@ -42,11 +43,39 @@ params {
run_ganon = false
run_kmcp = false
kmcp_mode = 0
=======
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_tool = 'adapterremoval'
perform_shortread_redundancyestimation = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
run_malt = false
run_metaphlan = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = false
run_motus = false
run_ganon = false
run_kmcp = false
kmcp_mode = 0
>>>>>>> bouncy-basenji
}

process {
withName: MALT_RUN {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
3 changes: 3 additions & 0 deletions conf/test_bbduk.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,7 @@ process {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
3 changes: 3 additions & 0 deletions conf/test_falco.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,7 @@ process {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
30 changes: 30 additions & 0 deletions conf/test_fastp.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params {
max_time = '6.h'

// Input data
<<<<<<< HEAD
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.2.csv'
perform_shortread_qc = true
Expand All @@ -43,11 +44,40 @@ params {
run_ganon = false
run_kmcp = false
kmcp_mode = 0
=======
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_tool = 'fastp'
perform_shortread_redundancyestimation = true
perform_shortread_complexityfilter = true
shortread_complexityfilter_tool = 'fastp'
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
run_malt = false
run_metaphlan = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = false
run_motus = false
run_ganon = false
run_kmcp = false
kmcp_mode = 0
>>>>>>> bouncy-basenji
}

process {
withName: MALT_RUN {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
Loading

0 comments on commit ece7d5a

Please sign in to comment.