From c5fdfbd6cb385ef9fc72cbcc676a2fdbcce72664 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 8 Feb 2023 14:32:56 +0100 Subject: [PATCH 01/50] Add pre-formatted database (#82) * add pre-formatted database info * add information about pre-formatted database --- README.md | 17 +++++++++++++---- docs/quickstart.md | 10 ++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8bf0a2d7..551b7cdf 100644 --- a/README.md +++ b/README.md @@ -107,15 +107,24 @@ These images have been kept separate to not create massive Docker image and to a nextflow run fmalmeida/bacannot -profile docker --help -🔥 To run the pipeline now users **need** to pass the `-profile docker` or `-profile singularity` parameter explicitely. The pipeline does not load it automatically anymore. +🔥 To run the pipeline now users **need** to pass the `-profile docker` or `-profile singularity` parameter explicitly. The pipeline does not load it automatically anymore. 🔥 Users can get let the pipeline always updated with: `nextflow pull fmalmeida/bacannot` ### Downloading and updating databases -Bacannot databases are not inside the docker images anymore to avoid huge images and problems with conexions and limit rates with dockerhub. +Bacannot databases are not inside the docker images anymore to avoid huge images and problems with connections and limit rates with dockerhub. + +#### Pre-formatted + +Users can directly download pre-formatted databases from Zenodo: https://doi.org/10.5281/zenodo.7615811 + +Useful for standardization and also overcoming known issues that may arise when formatting databases with `singularity` profile. + +#### I want to generate a new formatted database + +To download and format a copy of required bacannot databases users can execute the following: -To get a copy of required bacannot databases users must: ```bash # Download pipeline databases nextflow run fmalmeida/bacannot --get_dbs --output bacannot_dbs -profile @@ -143,7 +152,7 @@ bacannot_dbs └── victors_db ``` -To update databases you can either download a new one to a new directory. Remove the database you want to get a new one from the root bacannot dir and use the same command above to save in the same directory (the pipeline will only try to download missing databases). Or, you can use the parameter `--force_update` to download everything again. +> To update databases you can either download a new one to a new directory. Remove the database you want to get a new one from the root bacannot dir and use the same command above to save in the same directory (the pipeline will only try to download missing databases). Or, you can use the parameter `--force_update` to download everything again. ## Quickstart diff --git a/docs/quickstart.md b/docs/quickstart.md index 46e72dba..69954a62 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -33,6 +33,16 @@ samplesheet: # this header is required ### Bacannot databases +Bacannot databases are not inside the docker images anymore to avoid huge images and problems with connections and limit rates with dockerhub. + +#### Pre-formatted + +Users can directly download pre-formatted databases from Zenodo: https://doi.org/10.5281/zenodo.7615811 + +Useful for standardization and also overcoming known issues that may arise when formatting databases with `singularity` profile. + +#### I want to generate a new formatted database + ```{bash .annotate hl_lines="5"} # Download pipeline databases nextflow run fmalmeida/bacannot \ From 548147a0c51488d1d24347bee270e3c17e4f945d Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 28 Feb 2023 15:13:27 -0300 Subject: [PATCH 02/50] 50 add tool integron finder 20 (#87) * update version * Add pre-formatted database (#83) * add pre-formatted database info * add information about pre-formatted database * update falmeida-py package * change version * change main tools to public containers * use biocontainer * aggregate other non-biocontainer tools and diminish the size of docker images * update module labels * re-arranged docker images * add integron_finder module * update amrfinder version * trying to addintegron finder to gff * update docker * fixed image install * fixed integron finder 2 gff * remove unnecessary grouptuple * fix image and emboss module * fix organization * add docker image to module * fix indentation * fix indentation * added integron finder results to final GFF and JBROWSE * integron finder results added to HTML report * fix docker image * properly added to json summary * update changelog * update readme * update list of tools * update default config in docs * backscape tildes * update installation docs * fix indentation * update outputs docs * fix wrong pipeline name * fix typo * update quickstart * fixed mlst execution in singularity * fix indentation --- .zenodo.json | 2 +- README.md | 11 +- bin/mlst-make_blast_db.sh | 23 ++++ bin/run_jbrowse.sh | 120 ++++++++++++--------- conf/docker.config | 44 +------- conf/singularity.config | 50 +-------- docker/misc/Dockerfile | 92 ++++++++-------- docker/perlenv/Dockerfile | 38 ------- docker/perlenv/build.sh | 1 - docker/pyenv/Dockerfile | 26 ----- docker/pyenv/build.sh | 1 - docker/renv/reports/no_integronfinder.Rmd | 1 + docker/renv/reports/report_MGEs.Rmd | 21 ++++ docker/renv/reports/yes_digis.Rmd | 2 +- docker/renv/reports/yes_integronfinder.Rmd | 12 +++ docker/set_version.sh | 2 +- docs/config.md | 24 ++++- docs/custom-db.md | 2 +- docs/index.md | 3 +- docs/installation.md | 18 ++-- docs/outputs.md | 1 + docs/profiles.md | 4 +- docs/quickstart.md | 2 +- markdown/CHANGELOG.md | 6 ++ markdown/list_of_tools.md | 2 + modules/KOs/kofamscan.nf | 7 +- modules/MGEs/integron_finder.nf | 47 ++++++++ modules/MGEs/integron_finder_2gff.nf | 24 +++++ modules/MGEs/islandpath.nf | 9 +- modules/MGEs/plasmidfinder.nf | 7 +- modules/MGEs/platon.nf | 7 +- modules/assembly/flye.nf | 7 +- modules/assembly/unicycler.nf | 7 +- modules/bacannot_dbs/amrfinder.nf | 9 +- modules/bacannot_dbs/card.nf | 4 +- modules/bacannot_dbs/mlst.nf | 2 +- modules/bacannot_dbs/phigaro.nf | 2 +- modules/generic/bakta.nf | 13 ++- modules/generic/barrnap.nf | 7 +- modules/generic/circos.nf | 2 +- modules/generic/gc_skew.nf | 2 +- modules/generic/gff2gbk.nf | 3 - modules/generic/jbrowse.nf | 5 +- modules/generic/mash.nf | 7 +- modules/generic/merge_annotations.nf | 8 +- modules/generic/mlst.nf | 15 +-- modules/generic/prokka.nf | 7 +- modules/generic/reports.nf | 97 +++++++++-------- modules/generic/summary.nf | 6 +- modules/prophages/phigaro.nf | 12 ++- modules/prophages/phispy.nf | 7 +- modules/resistance/amrfinder.nf | 7 +- modules/resistance/rgi_annotation.nf | 12 ++- nextflow.config | 4 +- workflows/bacannot.nf | 56 ++++++---- 55 files changed, 523 insertions(+), 387 deletions(-) create mode 100755 bin/mlst-make_blast_db.sh delete mode 100644 docker/perlenv/Dockerfile delete mode 100644 docker/perlenv/build.sh delete mode 100644 docker/pyenv/Dockerfile delete mode 100644 docker/pyenv/build.sh create mode 100644 docker/renv/reports/no_integronfinder.Rmd create mode 100644 docker/renv/reports/yes_integronfinder.Rmd create mode 100644 modules/MGEs/integron_finder.nf create mode 100644 modules/MGEs/integron_finder_2gff.nf diff --git a/.zenodo.json b/.zenodo.json index 64902e93..3865b55a 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -2,7 +2,7 @@ "description": "

The pipeline

\n\n

bacannot, is a customisable, easy to use, pipeline that uses state-of-the-art software for comprehensively annotating prokaryotic genomes having only Docker and Nextflow as dependencies. It is able to annotate and detect virulence and resistance genes, plasmids, secondary metabolites, genomic islands, prophages, ICEs, KO, and more, while providing nice an beautiful interactive documents for results exploration.

", "license": "other-open", "title": "fmalmeida/bacannot: A generic but comprehensive bacterial annotation pipeline", - "version": "v3.2", + "version": "v3.3", "upload_type": "software", "creators": [ { diff --git a/README.md b/README.md index 551b7cdf..35dbaf58 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ Its main steps are: | Annotation of virulence genes | [Victors](http://www.phidias.us/victors/) and [VFDB](http://www.mgc.ac.cn/VFs/main.htm) | | Prophage sequences and genes annotation | [PHASTER](http://phast.wishartlab.com/), [Phigaro](https://github.com/bobeobibo/phigaro) and [PhySpy](https://github.com/linsalrob/PhiSpy) | | Annotation of integrative and conjugative elements | [ICEberg](http://db-mml.sjtu.edu.cn/ICEberg/) | +| Annotation of bacterial integrons | [Integron Finder](https://github.com/gem-pasteur/Integron_Finder) | | Focused detection of insertion sequences | [digIS](https://github.com/janka2012/digIS) | | _In silico_ detection of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/) and [Platon](https://github.com/oschwengers/platon) | | Prediction and visualization of genomic islands | [IslandPath-DIMOB](https://github.com/brinkmanlab/islandpath) and [gff-toolbox](https://github.com/fmalmeida/gff-toolbox) | @@ -90,14 +91,12 @@ These images have been kept separate to not create massive Docker image and to a * After installed, you need to download the required Docker images ```bash - docker pull fmalmeida/bacannot:v3.2_misc ; - docker pull fmalmeida/bacannot:v3.2_perlenv ; - docker pull fmalmeida/bacannot:v3.2_pyenv ; - docker pull fmalmeida/bacannot:v3.2_renv ; - docker pull fmalmeida/bacannot:jbrowse ; + docker pull fmalmeida/bacannot:v3.3_misc ; + docker pull fmalmeida/bacannot:v3.3_renv ; + docker pull fmalmeida/bacannot:jbrowse ; ``` -🔥 Nextflow can also automatically handle images download on the fly when executed. If docker has exceeded its download limit rates, please try again in a few hours. +🔥 Nextflow can also automatically handle images download on the fly when executed. All the other docker images from biocontainers are downloaded automatically. If docker has exceeded its download limit rates, please try again in a few hours. 2. Install Nextflow (version 20.10 or higher): diff --git a/bin/mlst-make_blast_db.sh b/bin/mlst-make_blast_db.sh new file mode 100755 index 00000000..4cc370f8 --- /dev/null +++ b/bin/mlst-make_blast_db.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +MLSTDIR="$0" +BLASTDIR="$DIR/../db/blast" +BLASTFILE="$BLASTDIR/mlst.fa" + +mkdir -p "$BLASTDIR" +rm -f "$BLASTFILE" + +#for N in $(find $MLSTDIR -maxdepth 1 | grep -v '_2$'); do +for N in $(find $MLSTDIR -mindepth 1 -maxdepth 1 -type d); do + SCHEME=$(basename $N) + echo "Adding: $SCHEME" + cat "$MLSTDIR"/$SCHEME/*.tfa \ + | grep -v 'not a locus' \ + | sed -e "s/^>/>$SCHEME./" \ + >> "$BLASTFILE" +done + +makeblastdb -hash_index -in "$BLASTFILE" -dbtype nucl -title "PubMLST" -parse_seqids + +echo "Created BLAST database for $BLASTFILE" diff --git a/bin/run_jbrowse.sh b/bin/run_jbrowse.sh index 75eb3283..6753ca15 100755 --- a/bin/run_jbrowse.sh +++ b/bin/run_jbrowse.sh @@ -14,7 +14,7 @@ Help() echo "Simple help message for the utilization of this script" echo "It takes the jbrowse data path and all the files that shall be plotted from bacannot" echo - echo "Syntax: run_jbrowse.sh [-h|p|g|b|s|f|r|B|P|G|m|S|R|d|A]" + echo "Syntax: run_jbrowse.sh [-h|p|g|b|s|f|r|B|P|G|m|S|R|d|A|i]" echo "options:" echo echo "h Print this help" @@ -32,59 +32,63 @@ Help() echo "R Path to Resfinder custom GFF" echo "d Path to digIS custom GFF" echo "A Path to antismash custom GFF" + echo "i Path to Integron Finder custom GFF" echo "" echo } # Get the options -while getopts "hp:g:b:s:f:r:B:P:G:m:S:R:d:A:" option; do - case $option in - h) # display Help - Help - exit;; -p) # get genome prefix - PREFIX="$OPTARG" - ;; -g) # get genome FASTA - GENOME="$OPTARG" - ;; -b) # get GC bedgraph - BEDGRAPH="$OPTARG" - ;; -s) # get chr sizes - CHRSIZES="$OPTARG" - ;; -f) # get prokka gff - PROKKAGFF="$OPTARG" - ;; -r) # get barrnap gff - rRNAGFF="$OPTARG" - ;; -B) # get phigaro bed - PHIGAROBED="$OPTARG" - ;; -P) # get phispy bed - PHISPYBED="$OPTARG" - ;; -G) # get GIs bed - GIBED="$OPTARG" - ;; -m) # get nanopolish methylation - NANOMETHYL="$OPTARG" - ;; -S) # get nanopolish chr sizes - NANOSIZES="$OPTARG" - ;; -R) # get resfinder GFF - RESFINDERGFF="$OPTARG" - ;; -d) # get digIS GFF - DIGISGFF="$OPTARG" - ;; -A) # get antismash GFF - ANTISMASHGFF="$OPTARG" - ;; - esac +while getopts "hp:g:b:s:f:r:B:P:G:m:S:R:d:A:i:" option; do + case $option in + h) # display Help + Help + exit;; + p) # get genome prefix + PREFIX="$OPTARG" + ;; + g) # get genome FASTA + GENOME="$OPTARG" + ;; + b) # get GC bedgraph + BEDGRAPH="$OPTARG" + ;; + s) # get chr sizes + CHRSIZES="$OPTARG" + ;; + f) # get prokka gff + PROKKAGFF="$OPTARG" + ;; + r) # get barrnap gff + rRNAGFF="$OPTARG" + ;; + B) # get phigaro bed + PHIGAROBED="$OPTARG" + ;; + P) # get phispy bed + PHISPYBED="$OPTARG" + ;; + G) # get GIs bed + GIBED="$OPTARG" + ;; + m) # get nanopolish methylation + NANOMETHYL="$OPTARG" + ;; + S) # get nanopolish chr sizes + NANOSIZES="$OPTARG" + ;; + R) # get resfinder GFF + RESFINDERGFF="$OPTARG" + ;; + d) # get digIS GFF + DIGISGFF="$OPTARG" + ;; + A) # get antismash GFF + ANTISMASHGFF="$OPTARG" + ;; + i) # get integron finder GFF + INTEGRONFINDERGFF="$OPTARG" + ;; + esac done # Main @@ -313,7 +317,7 @@ remove-track.pl --trackLabel "${PREFIX} CARD-RGI resistance features" --dir data --trackLabel "${PREFIX} Resfinder resistance features" --out "data" --nameAttributes "Resfinder_gene,ID,Resfinder_phenotype" ; remove-track.pl --trackLabel "${PREFIX} Resfinder resistance features" --dir data &> /tmp/error [ ! -s $RESFINDERGFF ] || echo -E " { \"compress\" : 0, \ - \"displayMode\" : \"compact\", \ + \"displayMode\" : \"compact\", \ \"key\" : \"${PREFIX} Resfinder resistance features\", \ \"category\" : \"Resistance annotation\", \ \"label\" : \"${PREFIX} Resfinder resistance features\", \ @@ -343,6 +347,22 @@ remove-track.pl --trackLabel "${PREFIX} ICE genes from ICEberg database" --dir d \"urlTemplate\" : \"tracks/${PREFIX} ICE genes from ICEberg database/{refseq}/trackData.json\" } " | add-track-json.pl data/trackList.json [ $(grep "ICEberg" $PROKKAGFF | wc -l) -eq 0 ] || rm -f iceberg ices ; +## Integron Finder +[ $(wc -l $INTEGRONFINDERGFF) -eq 0 ] || flatfile-to-json.pl --gff $INTEGRONFINDERGFF --key "${PREFIX} Annotated Integrons - Integron Finder" --trackType CanvasFeatures \ +--trackLabel "${PREFIX} Annotated Integrons - Integron Finder" --out "data" --nameAttributes "ID,integron_type" ; +remove-track.pl --trackLabel "${PREFIX} Annotated Integrons - Integron Finder" --dir data &> /tmp/error +[ $(wc -l $INTEGRONFINDERGFF) -eq 0 ] || echo -E " { \"compress\" : 0, \ +\"displayMode\" : \"compact\", \ + \"key\" : \"${PREFIX} Annotated Integrons - Integron Finder\", \ + \"category\" : \"MGEs annotation\", \ + \"label\" : \"${PREFIX} Annotated Integrons - Integron Finder\", \ + \"storeClass\" : \"JBrowse/Store/SeqFeature/NCList\", \ + \"style\" : { \"className\" : \"feature\", \"color\": \"#6db6d9\" }, \ + \"trackType\" : \"CanvasFeatures\", \ + \"type\" : \"CanvasFeatures\", \ + \"nameAttributes\" : \"ID,integron_type\", \ + \"urlTemplate\" : \"tracks/${PREFIX} Annotated Integrons - Integron Finder/{refseq}/trackData.json\" } " | add-track-json.pl data/trackList.json + ## PROPHAGES ### PHAST [ $(grep "PHAST" $PROKKAGFF | wc -l) -eq 0 ] || grep "PHAST" $PROKKAGFF > prophage ; diff --git a/conf/docker.config b/conf/docker.config index 84710c67..a78d142f 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -16,22 +16,12 @@ process { // container with various tools for general purposes withLabel: 'db_download|db_tools|misc' { - container = 'fmalmeida/bacannot:v3.2_misc' - } - - // container for perl tools - withLabel: 'perl' { - container = 'fmalmeida/bacannot:v3.2_perlenv' - } - - // container for python tools - withLabel: 'python' { - container = 'fmalmeida/bacannot:v3.2_pyenv' + container = 'fmalmeida/bacannot:v3.3_misc' } // container for R tools withLabel: 'renv' { - container = 'fmalmeida/bacannot:v3.2_renv' + container = 'fmalmeida/bacannot:v3.3_renv' } // container for bacannot server @@ -42,35 +32,5 @@ process { withLabel: 'jbrowse' { container = 'fmalmeida/bacannot:jbrowse' } - - withName: UNICYCLER { - container = "quay.io/biocontainers/unicycler:${params.unicycler_version}" - } - - withName: FLYE { - container = "quay.io/biocontainers/flye:${params.flye_version}" - } - - withName: BAKTA { - container = "quay.io/biocontainers/bakta:${params.bakta_version}" - } - - /* - * Other (non-image) customization - */ - - // islandPath dimob container - withName: 'ISLANDPATH' { - // it generally fails without any reason on the first time - errorStrategy = 'retry' - maxRetries = 5 - } - - // kofamscan container - withName: 'KOFAMSCAN' { - // it generally fails without any reason on the first time - errorStrategy = 'retry' - maxRetries = 2 - } } diff --git a/conf/singularity.config b/conf/singularity.config index a7ca1839..eb8c0040 100644 --- a/conf/singularity.config +++ b/conf/singularity.config @@ -1,7 +1,7 @@ // Container usage and permission -docker.enabled = false -singularity.enabled = true -singularity.runOptions = '--writable-tmpfs' +docker.enabled = false +singularity.enabled = true +singularity.runOptions = '--writable-tmpfs -B $PWD' /* @@ -14,22 +14,12 @@ process { // container with various tools for general purposes withLabel: 'db_download|db_tools|misc' { - container = 'docker://fmalmeida/bacannot:v3.2_misc' - } - - // container for perl tools - withLabel: 'perl' { - container = 'docker://fmalmeida/bacannot:v3.2_perlenv' - } - - // container for python tools - withLabel: 'python' { - container = 'docker://fmalmeida/bacannot:v3.2_pyenv' + container = 'docker://fmalmeida/bacannot:v3.3_misc' } // container for R tools withLabel: 'renv' { - container = 'docker://fmalmeida/bacannot:v3.2_renv' + container = 'docker://fmalmeida/bacannot:v3.3_renv' } // container for bacannot server @@ -40,35 +30,5 @@ process { withLabel: 'jbrowse' { container = 'docker://fmalmeida/bacannot:jbrowse' } - - withName: UNICYCLER { - container = "https://depot.galaxyproject.org/singularity/unicycler:${params.unicycler_version}" - } - - withName: FLYE { - container = "https://depot.galaxyproject.org/singularity/flye:${params.flye_version}" - } - - withName: BAKTA { - container = "https://depot.galaxyproject.org/singularity/bakta:${params.bakta_version}" - } - - /* - * Other (non-image) customization - */ - - // islandPath dimob container - withName: 'ISLANDPATH' { - // it generally fails without any reason on the first time - errorStrategy = 'retry' - maxRetries = 5 - } - - // kofamscan container - withName: 'KOFAMSCAN' { - // it generally fails without any reason on the first time - errorStrategy = 'retry' - maxRetries = 2 - } } diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index 3853a59f..486eff08 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -2,42 +2,49 @@ FROM nfcore/base LABEL authors="Felipe Almeida" \ description="Docker image containing any-based bacannot tools" +# install mamba +RUN conda update -n base -c defaults conda && conda install -c conda-forge -y 'mamba>=1.0' + # Install the conda environment -RUN conda install -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida --force-reinstall --update-deps --no-channel-priority \ - curl \ - git \ - 'python=3.7' \ - 'blast=2.12.0' \ - 'diamond=2.0.15' \ - 'bedtools=2.30' \ - 'samtools=1.14' \ - 'kma' \ - 'kofamscan' \ - 'ncbi-amrfinderplus' \ - 'nanopolish' \ - 'biopython==1.78' \ - gff-toolbox \ - seqkit && \ - conda clean -afy +RUN mamba install -y \ + -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ + --no-channel-priority \ + 'conda-forge::python>=3.10' \ + 'blast>=2.12' \ + 'diamond>=2.0.15' \ + 'bedtools>=2.30' \ + 'samtools' \ + 'kma' \ + 'nanopolish' \ + 'biopython==1.78' \ + gff-toolbox \ + seqkit \ + bioawk \ + 'easy_circos>=0.3' \ + 'falmeida-py>=1.1' \ + 'conda-forge::openssl>=1.1.1' \ + emboss \ + libtiff \ + jq && \ + mamba clean -afy # Create env for digIS -RUN conda create -y -n digIS -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida --no-channel-priority 'hmmer==3.1b2' 'biopython==1.77' nomkl && \ - conda clean -afy - -# Create env for antismash -RUN conda create -y -n antismash \ - -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida --no-channel-priority \ - 'antismash>=6' 'anaconda::jinja2' 'anaconda::markupsafe' emboss nomkl && \ - rm -r /opt/conda/envs/antismash/lib/*/site-packages/antismash/databases && \ - conda clean -afy +RUN mamba create -y \ + -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ + -n digIS \ + --no-channel-priority \ + 'hmmer==3.1b2' 'biopython==1.77' nomkl && \ + mamba clean -afy # Install pip packages -RUN pip install docopt pandas tabulate numpy bcbio-gff cgecore gitpython setuptools python-dateutil 'biopython==1.78' +# RUN pip install docopt pandas tabulate numpy bcbio-gff cgecore gitpython setuptools python-dateutil 'biopython==1.78' # Install KEGGDecoder -RUN conda create -n KEGGDecoder python=3.6 && \ +RUN mamba create \ + -n KEGGDecoder \ + python=3.6 && \ conda run -n KEGGDecoder python3 -m pip install KEGGDecoder && \ - conda clean -afy + mamba clean -afy # set CONDA_PREFIX ENV CONDA_PREFIX=/opt/conda @@ -48,29 +55,30 @@ COPY argminer_bkp/argminer.fasta /work/argminer.fasta COPY victors_bkp/victors_06-2022.fasta /work/victors.fasta # get a copy of resfinder -RUN conda create -y -n resfinder \ - -c bioconda -c anaconda -c conda-forge -c defaults \ +RUN mamba create -y -n resfinder \ + -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ 'resfinder>=4.1' docopt pandas && \ - conda clean -afy + mamba clean -afy # get a copy of digis RUN git clone -b master https://github.com/janka2012/digIS.git COPY custom_fix_grange_digis.py /work/digIS/src/common/grange.py ENV PATH=/work/digIS:$PATH -# install jq -RUN apt-get update && apt-get install -y jq +# Create env for antismash +RUN conda create -y -n antismash -c bioconda -c conda-forge \ + 'antismash>=6' 'anaconda::jinja2' 'anaconda::markupsafe' nomkl && \ + rm -r /opt/conda/envs/antismash/lib/*/site-packages/antismash/databases && \ + mamba clean -afy -# install bioawk -RUN conda create -y -n bioawk \ - -c bioconda -c anaconda -c conda-forge -c defaults \ - bioawk && \ - conda clean -afy +# fix bioperl +RUN mamba create -n perl -y -c bioconda -c conda-forge perl-bioperl perl-app-cpanminus perl-yaml +RUN conda run -n perl PERL5LIB= PERL_LOCAL_LIB_ROOT= cpanm Bio::Root::RootI + +# fix python +RUN python3 -m pip install cryptography==38.0.4 # fix permissions RUN chmod 777 -R /work RUN chmod 777 -R /opt/conda/envs/antismash/lib/*/site-packages/antismash -RUN chmod 777 -R /opt/conda/envs/resfinder - -# fix antismash download script -# RUN sed -i 's|ftp://|http://|g' /opt/conda/envs/antismash/lib/*/site-packages/antismash/download_databases.py \ No newline at end of file +RUN chmod 777 -R /opt/conda/envs/resfinder \ No newline at end of file diff --git a/docker/perlenv/Dockerfile b/docker/perlenv/Dockerfile deleted file mode 100644 index 2e8b1d24..00000000 --- a/docker/perlenv/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -FROM nfcore/base -LABEL authors="Felipe Almeida" -LABEL description="Docker image containing perl-based bacannot tools" -ENV IMAGE=perl - -# Install the conda environment -RUN conda install \ - -c bioconda -c defaults -c conda-forge -c anaconda -c r -c falmeida \ - --force-reinstall --update-deps --no-channel-priority \ - 'prokka>=1.14' \ - 'hmmer=3.1b2' \ - barrnap \ - mlst \ - 'islandpath>=1.0.6' \ - 'python>3' \ - 'perl>=5.26' \ - 'perl-bioperl>=1.7.8' \ - perl-digest-sha1 \ - perl-app-cpanminus \ - perl-local-lib \ - 'easy_circos>=0.3' -ENV PERL5LIB="/opt/conda/lib/perl5/site_perl" - -# Fix perl -- prokka -RUN apt-get update -y && apt-get install -y build-essential libexpat1-dev -RUN cpanm Test::Needs --force --reinstall -RUN cpanm Test::RequiresInternet Test::NoWarnings --force --reinstall -RUN conda install -c conda-forge -y 'perl-xml-parser>2.44' -RUN cpanm XML::Twig Bio::Perl --force --reinstall || true -RUN apt-get install -y libtiff5 - -# set CONDA_PREFIX -ENV CONDA_PREFIX=/opt/conda - -WORKDIR /work - -# fix permissions for singularity -RUN chmod -R 777 /work /opt/conda/db \ No newline at end of file diff --git a/docker/perlenv/build.sh b/docker/perlenv/build.sh deleted file mode 100644 index 51153a5c..00000000 --- a/docker/perlenv/build.sh +++ /dev/null @@ -1 +0,0 @@ -../../bin/build_image.sh $1 diff --git a/docker/pyenv/Dockerfile b/docker/pyenv/Dockerfile deleted file mode 100644 index 3a4fe8bc..00000000 --- a/docker/pyenv/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -FROM nfcore/base -LABEL authors="Felipe Almeida" \ - description="Docker image containing python-based bacannot tools" - -# Install the conda environment -# RUN conda install -c conda-forge -y 'mamba>=0.21' -RUN conda install -y \ - -c bioconda -c defaults -c conda-forge -c anaconda \ - 'openssl=1.1.1' \ - 'platon>=1.6' \ - phispy \ - plasmidfinder \ - 'python>=3.7' \ - refseq_masher \ - 'gsl==2.7' -RUN conda install -c conda-forge -y 'mamba>=0.21' -RUN mamba create -n rgi -c bioconda -c defaults -c conda-forge -c anaconda 'rgi>=5.2.1' -RUN mamba create -n phigaro -c bioconda -c defaults -c conda-forge -c anaconda phigaro -RUN mamba create -n falmeida-py -c falmeida -c bioconda -c defaults -c conda-forge -c anaconda 'falmeida-py>=0.9' - -# set CONDA_PREFIX -ENV CONDA_PREFIX=/opt/conda - -# Fix permissions -WORKDIR /work -RUN chmod -R 777 /work /opt/conda/envs/rgi/lib/python*/site-packages/app \ No newline at end of file diff --git a/docker/pyenv/build.sh b/docker/pyenv/build.sh deleted file mode 100644 index 51153a5c..00000000 --- a/docker/pyenv/build.sh +++ /dev/null @@ -1 +0,0 @@ -../../bin/build_image.sh $1 diff --git a/docker/renv/reports/no_integronfinder.Rmd b/docker/renv/reports/no_integronfinder.Rmd new file mode 100644 index 00000000..175d25ce --- /dev/null +++ b/docker/renv/reports/no_integronfinder.Rmd @@ -0,0 +1 @@ +Not a integron have been predicted with [Integron Finder](https://github.com/gem-pasteur/Integron_Finder). This might have happened either because your genome really do not have integron sequences or due to misassemblies. You can always try to run the online version of the tool: https://integronfinder.readthedocs.io/en/latest/user_guide/webserver.html diff --git a/docker/renv/reports/report_MGEs.Rmd b/docker/renv/reports/report_MGEs.Rmd index f17cabfe..cd3657a2 100644 --- a/docker/renv/reports/report_MGEs.Rmd +++ b/docker/renv/reports/report_MGEs.Rmd @@ -15,6 +15,7 @@ params: platon_tsv: gi_image: digis: + integronfinder: query: gff: output: @@ -72,6 +73,16 @@ if (class(digis_gff) != 'try-error' & check_lines(digis_gff) > 0) { digis_null <- TRUE } +## Read Integron Finder GFF +integronfinder_gff <- try(gffRead(params$integronfinder), silent = TRUE) +if (class(integronfinder_gff) != 'try-error' & check_lines(integronfinder_gff) > 0) { + integronfinder_not_null <- TRUE + integronfinder_null <- FALSE +} else { + integronfinder_not_null <- FALSE + integronfinder_null <- TRUE +} + ## Read PHAST documents phast_prot_blast <- try(read.delim(params$phast_prot_blast, header = TRUE), silent = TRUE) phast_genome_blast <- try(read.delim(params$phast_genome_blast, header = TRUE), silent = TRUE) @@ -177,6 +188,8 @@ In this context, this pipeline is capable of automatically annotating some mobil + IslandPath-DIMOB is a standalone software to predict genomic islands in bacterial and archaeal genomes based on the presence of dinucleotide biases and mobility genes. 8. [digIS](https://github.com/janka2012/digIS). + digIS is a command-line tool for detection of insertion sequences (IS) in prokaryotic genomes. +9. [Integron Finder](https://github.com/gem-pasteur/Integron_Finder). + + a command line tool to identity integrons in DNA sequences ### Prediction thresholds @@ -231,4 +244,12 @@ knitr::include_graphics(gi_image) ``` ```{r, digis_conditional_block_2, echo=FALSE, results='asis', eval=digis_null, child='no_digis.Rmd'} +``` + +## Integron detection + +```{r, integronfinder_conditional_block, echo=FALSE, results='asis', eval=integronfinder_not_null, child='yes_integronfinder.Rmd'} +``` + +```{r, integronfinder_conditional_block_2, echo=FALSE, results='asis', eval=integronfinder_null, child='no_integronfinder.Rmd'} ``` \ No newline at end of file diff --git a/docker/renv/reports/yes_digis.Rmd b/docker/renv/reports/yes_digis.Rmd index c3aedaa3..b52eccc0 100644 --- a/docker/renv/reports/yes_digis.Rmd +++ b/docker/renv/reports/yes_digis.Rmd @@ -11,7 +11,7 @@ Insertions sequences have been predicted with [digIS](https://github.com/janka20 > The program is executed **with** the GenBank annotation
-(#tab:write-table-ices-full) Insertions sequences predicted by digIS in GFF format. +(#tab:write-table-digis-full) Insertions sequences predicted by digIS in GFF format. ```{r} datatable(digis_gff, escape = FALSE, diff --git a/docker/renv/reports/yes_integronfinder.Rmd b/docker/renv/reports/yes_integronfinder.Rmd new file mode 100644 index 00000000..164e7137 --- /dev/null +++ b/docker/renv/reports/yes_integronfinder.Rmd @@ -0,0 +1,12 @@ +Integrons have been predicted with [Integron Finder](https://github.com/gem-pasteur/Integron_Finder). More information on how the software operates can be found in its [paper](https://www.mdpi.com/2076-2607/10/4/700). + +
+(#tab:write-table-integronfinder-full) Integrons predicted by Integron Finder in GFF format. +```{r} +datatable(integronfinder_gff, + escape = FALSE, + filter = 'top', + options = dt_opt_lst, + extensions = 'Buttons', + rownames = F) +``` \ No newline at end of file diff --git a/docker/set_version.sh b/docker/set_version.sh index b082ce7f..f02bd27e 100644 --- a/docker/set_version.sh +++ b/docker/set_version.sh @@ -1 +1 @@ -export NEW_VERSION=v3.2 +export NEW_VERSION=v3.3 diff --git a/docs/config.md b/docs/config.md index aea28c7e..00664232 100644 --- a/docs/config.md +++ b/docs/config.md @@ -68,12 +68,17 @@ params { // By default (if Blank), this process is not executed. For execution the user needs to provide a value bedtools_merge_distance = null + /* + * Bakta optional + */ +// If user set path to an existing bakta database, the pipeline will use bakta instead of prokka + bakta_db = null + /* * Prokka optional parameters */ -// Do not use PGAP (NCBI) database? +// Include comprehensive PGAP hmm database in prokka annotation instead of TIGRFAM. // PGAP is big and using it may have higher running times but better results -// To do not use it, set the following to true prokka_use_pgap = false // Annotation mode: Archaea|Bacteria|Mitochondria|Viruses (default 'Bacteria') @@ -106,8 +111,7 @@ params { * * Which means: false will allow its execution * while true will create a barrier and skip a process. - -*/ + */ // (NOT RUN?) Plasmids annotation (controls PlasmidFinder execution) skip_plasmid_search = false @@ -179,8 +183,18 @@ params { // User's custom database coverage threshold blast_custom_mincov = 65 + /* + * Resources allocation configuration + * Defaults only, expecting to be overwritten + */ +// Select versions of bioconda quay.io additional tools +// Tools that are not part of the core of the pipeline, +// but can eventually be used by users + unicycler_version = '0.4.8--py38h8162308_3' + flye_version = '2.9--py39h39abbe0_0' + bakta_version = '1.6.1--pyhdfd78af_0' + // Max resource options -// Defaults only, expecting to be overwritten max_memory = '20.GB' max_cpus = 16 max_time = '40.h' diff --git a/docs/custom-db.md b/docs/custom-db.md index 490b6baf..cd580978 100644 --- a/docs/custom-db.md +++ b/docs/custom-db.md @@ -6,7 +6,7 @@ Although simple, the files must follow some formatting rules in order to make it ## Files in FASTA (`--custom_db`) -Sequence headers must follow a 5-field rule separated by "~~~" and spaces. The first 4 fields must be separated by "~~~" and the last one by one space, as exemplified below: +Sequence headers must follow a 5-field rule separated by "\~\~\~" and spaces. The first 4 fields must be separated by "\~\~\~" and the last one by one space, as exemplified below: ```bash # Sequence header diff --git a/docs/index.md b/docs/index.md index 036adba5..3770d11a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -23,7 +23,7 @@ The pipeline's main steps are: | :------------- | :------------------------- | | Genome assembly (if raw reads are given) | [Flye](https://github.com/fenderglass/Flye) and [Unicycler](https://github.com/rrwick/Unicycler) | | Identification of closest 10 NCBI Refseq genomes | [RefSeq Masher](https://github.com/phac-nml/refseq_masher) | -| Generic annotation and gene prediction | [Prokka](https://github.com/tseemann/prokka) | +| Generic annotation and gene prediction | [Prokka](https://github.com/tseemann/prokka) or [Bakta](https://github.com/oschwengers/bakta) | | rRNA prediction | [barrnap](https://github.com/tseemann/barrnap) | | Classification within multi-locus sequence types (STs) | [mlst](https://github.com/tseemann/mlst) | | KEGG KO annotation and visualization | [KofamScan](https://github.com/takaram/kofam_scan) and [KEGGDecoder](https://github.com/bjtully/BioData/tree/master/KEGGDecoder) | @@ -33,6 +33,7 @@ The pipeline's main steps are: | Annotation of virulence genes | [Victors](http://www.phidias.us/victors/) and [VFDB](http://www.mgc.ac.cn/VFs/main.htm) | | Prophage sequences and genes annotation | [PHASTER](http://phast.wishartlab.com/), [Phigaro](https://github.com/bobeobibo/phigaro) and [PhySpy](https://github.com/linsalrob/PhiSpy) | | Annotation of integrative and conjugative elements | [ICEberg](http://db-mml.sjtu.edu.cn/ICEberg/) | +| Annotation of bacterial integrons | [Integron Finder](https://github.com/gem-pasteur/Integron_Finder) | | Focused detection of insertion sequences | [digIS](https://github.com/janka2012/digIS) | | _In silico_ detection of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/) and [Platon](https://github.com/oschwengers/platon) | | Prediction and visualization of genomic islands | [IslandPath-DIMOB](https://github.com/brinkmanlab/islandpath) and [gff-toolbox](https://github.com/fmalmeida/gff-toolbox) | diff --git a/docs/installation.md b/docs/installation.md index 5fd627ef..87c83c43 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -19,27 +19,27 @@ nextflow pull fmalmeida/bacannot ## Downloading docker images -The docker images used by the pipeline are: +The custom docker images used by the pipeline are: ```bash -docker pull fmalmeida/bacannot:v3.2_misc ; -docker pull fmalmeida/bacannot:v3.2_perlenv ; -docker pull fmalmeida/bacannot:v3.2_pyenv ; -docker pull fmalmeida/bacannot:v3.2_renv ; -docker pull fmalmeida/bacannot:jbrowse ; +docker pull fmalmeida/bacannot:v3.3_misc ; +docker pull fmalmeida/bacannot:v3.3_renv ; +docker pull fmalmeida/bacannot:jbrowse ; ``` +> The pipeline also uses other public images available in biocontainers + !!! info "Using singularity" - Docker and singularity images are downloaded on the fly. Be sure to properly set `NXF_SINGULARITY_LIBRARYDIR` env variable to a writable directory if using Singularity. This will make that the downloaded images are resuable through different executions. Read more at: https://www.nextflow.io/docs/latest/singularity.html#singularity-docker-hub + **Docker and singularity images are downloaded on the fly**. Be sure to properly set `NXF_SINGULARITY_LIBRARYDIR` env variable to a writable directory if using Singularity. This will make that the downloaded images are reusable through different executions. Read more at: https://www.nextflow.io/docs/latest/singularity.html#singularity-docker-hub For example, to download the images for docker you may: ```bash # apply this command to each image # just change the "/" and ":" for "-". - # E.g. Image fmalmeida/bacannot:v3.2_misc becomes fmalmeida-bacannot-v3.2_misc.img - singularity pull --dir $NXF_SINGULARITY_LIBRARYDIR fmalmeida-bacannot-v3.2_misc.img docker://fmalmeida/bacannot:v3.2_misc + # E.g. Image fmalmeida/bacannot:v3.3_misc becomes fmalmeida-bacannot-v3.3_misc.img + singularity pull --dir $NXF_SINGULARITY_LIBRARYDIR fmalmeida-bacannot-v3.3_misc.img docker://fmalmeida/bacannot:v3.3_misc ``` ## Testing your installation diff --git a/docs/outputs.md b/docs/outputs.md index 1f8fe8d3..c072586e 100644 --- a/docs/outputs.md +++ b/docs/outputs.md @@ -26,6 +26,7 @@ After a successful execution, you will have something like this: |   ├── gffs # A copy of the main GFF files produced during the annotation |   ├── genomic_islands # Genomic Islands predicted with IslandPath-DIMOB |   ├── ICEs # Results from ICEberg database annotation +|   ├── integron_finder # Results from Integron Finder tool annotation |   ├── jbrowse # The files that set up the JBrowse genome browser |   ├── KOfamscan # Results from annotation with KEGG database |   ├── methylations # Methylated sites predicted with Nanopolish (if fast5 is given) diff --git a/docs/profiles.md b/docs/profiles.md index 409dc5ea..951c9f70 100644 --- a/docs/profiles.md +++ b/docs/profiles.md @@ -11,7 +11,7 @@ Note that some form of configuration will be needed so that Nextflow knows how t The pipeline have "standard profiles" set to run the workflows with either **docker** or **singularity** using the [local executor](https://www.nextflow.io/docs/latest/executor.html), which is nextflow's default executor and basically runs the pipeline processes in the computer where Nextflow is launched. -If you need to run the pipeline using another executor such as sge, lsf, slurm, etc. you can take a look at [nextflow's manual page](https://www.nextflow.io/docs/latest/executor.html) to proper configure one in a new custom profile set in your personal copy of [MpGAP config file](https://github.com/fmalmeida/phylogram/blob/master/nextflow.config) and take advantage that nextflow allows multiple profiles to be used at once, e.g. `-profile docker,sge`. +If you need to run the pipeline using another executor such as sge, lsf, slurm, etc. you can take a look at [nextflow's manual page](https://www.nextflow.io/docs/latest/executor.html) to proper configure one in a new custom profile set in your personal copy of [MpGAP config file](https://github.com/fmalmeida/bacannot/blob/master/nextflow.config) and take advantage that nextflow allows multiple profiles to be used at once, e.g. `-profile docker,sge`. !!! note @@ -27,5 +27,5 @@ If you are using `singularity` and are persistently observing issues downloading ```bash # run -nextflow run fmalmeida/phylogram -profile singularity [OPTIONS] +nextflow run fmalmeida/bacannot -profile singularity [OPTIONS] ``` \ No newline at end of file diff --git a/docs/quickstart.md b/docs/quickstart.md index 69954a62..3f68c7f0 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -53,7 +53,7 @@ nextflow run fmalmeida/bacannot \ !!! important "About profiles" - Users **must** select one of the available profiles: docker or singularity. Conda may come in near future. Please read more about how to [proper select NF profiles](profiles.md#) + Users **must** select one of the available profiles: docker or singularity. Conda may come in future. Please read more about how to [proper select NF profiles](profiles.md#) ## Run the pipeline diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index b47caeed..fcd33376 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -2,6 +2,12 @@ The tracking for changes started in v2.1 +## v3.3 [TBD] + +* [[#50](https://github.com/fmalmeida/bacannot/issues/50)] -- Add `Integron Finder` tool to the pipeline +* [#69](https://github.com/fmalmeida/bacannot/issues/69) -- Change how tools use docker images in order to: + * make tools use public bioconda images whenever possible to allow easy addition of tools and avoid much conflicts in docker images + * dimish the size and tools inside the docker images, the docker images now are only built to contain tools and all required for modules that cannot just use bioconda docker images. ## v3.2 [19-December-2022] * Fixes https://github.com/fmalmeida/bacannot/issues/68 reported by @lam-c diff --git a/markdown/list_of_tools.md b/markdown/list_of_tools.md index 68d4bf21..7151f7d1 100644 --- a/markdown/list_of_tools.md +++ b/markdown/list_of_tools.md @@ -16,10 +16,12 @@ These are the tools that wrapped inside bacannot. **Cite** the tools whenever yo | Annotation of virulence genes | [Victors](http://www.phidias.us/victors/) and [VFDB](http://www.mgc.ac.cn/VFs/main.htm) | | Prophage sequences and genes annotation | [PHASTER](http://phast.wishartlab.com/), [Phigaro](https://github.com/bobeobibo/phigaro) and [PhySpy](https://github.com/linsalrob/PhiSpy) | | Annotation of integrative and conjugative elements | [ICEberg](http://db-mml.sjtu.edu.cn/ICEberg/) | +| Annotation of bacterial integrons | [Integron Finder](https://github.com/gem-pasteur/Integron_Finder) | | Focused detection of insertion sequences | [digIS](https://github.com/janka2012/digIS) | | _In silico_ detection of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/) and [Platon](https://github.com/oschwengers/platon) | | Prediction and visualization of genomic islands | [IslandPath-DIMOB](https://github.com/brinkmanlab/islandpath) and [gff-toolbox](https://github.com/fmalmeida/gff-toolbox) | | Custom annotation from formatted FASTA or NCBI protein IDs | [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs) | | Merge of annotation results | [bedtools](https://bedtools.readthedocs.io/en/latest/) | | Genome Browser renderization | [JBrowse](http://jbrowse.org/) | +| Circos plot generation | [easy_circos](https://easy_circos.readthedocs.io/en/latest/index.html) | | Renderization of automatic reports and shiny app for results interrogation | [R Markdown](https://rmarkdown.rstudio.com/), [Shiny](https://shiny.rstudio.com/) and [SequenceServer](https://sequenceserver.com/) | diff --git a/modules/KOs/kofamscan.nf b/modules/KOs/kofamscan.nf index 87d25450..35aff276 100644 --- a/modules/KOs/kofamscan.nf +++ b/modules/KOs/kofamscan.nf @@ -4,7 +4,12 @@ process KOFAMSCAN { else "$filename" } tag "${prefix}" - label = [ 'misc', 'process_high' ] + label = [ 'process_high', 'error_retry' ] + + conda "bioconda::kofamscan=1.3.0" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/kofamscan:1.3.0--hdfd78af_2' : + 'quay.io/biocontainers/kofamscan:1.3.0--hdfd78af_2' }" input: tuple val(prefix), file('proteins.faa') diff --git a/modules/MGEs/integron_finder.nf b/modules/MGEs/integron_finder.nf new file mode 100644 index 00000000..910f503d --- /dev/null +++ b/modules/MGEs/integron_finder.nf @@ -0,0 +1,47 @@ +process INTEGRON_FINDER { + publishDir "${params.output}", mode: 'copy', saveAs: { filename -> + if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" + else "${prefix}/integron_finder/$filename" + } + tag "${prefix}" + label = [ 'process_medium' ] + + conda "bioconda::integron_finder=2.0.1" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/integron_finder:2.0.1--pyhdfd78af_0' : + 'quay.io/biocontainers/integron_finder:2.0.1--pyhdfd78af_0' }" + + input: + tuple val(prefix), file(genome) + + output: + tuple val(prefix), path("*") , emit: all + tuple val(prefix), path("${prefix}_integrons.gbk"), emit: gbk, optional: true + path("integronfinder_version.txt") + + script: + def args = task.ext.args ?: '' + """ + # Get version + integron_finder --version > integronfinder_version.txt ; + + # run tool + integron_finder \\ + --local-max \\ + --func-annot \\ + --pdf \\ + --gbk \\ + --cpu $task.cpus \\ + $args \\ + $genome + + # move results + mv Results_Integron_Finder_${prefix}/* . ; + rm -rf Results_Integron_Finder_${prefix} ; + + # convert to gff if available + for gbk in \$(ls *.gbk) ; do + cat \$gbk >> ${prefix}_integrons.gbk ; + done + """ +} diff --git a/modules/MGEs/integron_finder_2gff.nf b/modules/MGEs/integron_finder_2gff.nf new file mode 100644 index 00000000..6abaeab3 --- /dev/null +++ b/modules/MGEs/integron_finder_2gff.nf @@ -0,0 +1,24 @@ +process INTEGRON_FINDER_2GFF { + publishDir "${params.output}/${prefix}/integron_finder", mode: 'copy' + tag "${prefix}" + label = [ 'misc', 'process_low' ] + + input: + tuple val(prefix), file(gbk) + + output: + tuple val(prefix), path("${prefix}_integrons.gff"), emit: gff + + script: + def args = task.ext.args ?: '' + """ + # convert to gff if available + touch ${prefix}_integrons.gff ; + for gbk in \$(ls *.gbk) ; do + conda run -n perl bp_genbank2gff3 \$gbk -o - | \ + grep 'integron_id' | \ + sed 's|ID=.*integron_id=|ID=|g' | \ + sed 's/GenBank/Integron_Finder/g' >> ${prefix}_integrons.gff + done + """ +} diff --git a/modules/MGEs/islandpath.nf b/modules/MGEs/islandpath.nf index d7ded993..d2f15a1b 100644 --- a/modules/MGEs/islandpath.nf +++ b/modules/MGEs/islandpath.nf @@ -1,7 +1,14 @@ process ISLANDPATH { publishDir "${params.output}/${prefix}/genomic_islands", mode: 'copy' tag "${prefix}" - label = [ 'perl', 'process_low' ] + label = [ 'process_low' ] + errorStrategy = 'retry' + maxRetries = 5 + + conda "bioconda::platon=1.6" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/islandpath:1.0.6--hdfd78af_0' : + 'quay.io/biocontainers/islandpath:1.0.6--hdfd78af_0' }" input: tuple val(prefix), file("annotation.gbk") diff --git a/modules/MGEs/plasmidfinder.nf b/modules/MGEs/plasmidfinder.nf index 318e93ea..44f344b1 100644 --- a/modules/MGEs/plasmidfinder.nf +++ b/modules/MGEs/plasmidfinder.nf @@ -4,7 +4,12 @@ process PLASMIDFINDER { else null } tag "${prefix}" - label = [ 'python', 'process_low' ] + label = [ 'process_low' ] + + conda "bioconda::plasmidfinder=2.1.6" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/plasmidfinder:2.1.6--py310hdfd78af_1' : + 'quay.io/biocontainers/plasmidfinder:2.1.6--py310hdfd78af_1' }" input: tuple val(prefix), file(genome) diff --git a/modules/MGEs/platon.nf b/modules/MGEs/platon.nf index 086ab4d3..84d9cfcf 100644 --- a/modules/MGEs/platon.nf +++ b/modules/MGEs/platon.nf @@ -5,7 +5,12 @@ process PLATON { else null } tag "${prefix}" - label = [ 'python', 'process_medium' ] + label = [ 'process_medium' ] + + conda "bioconda::platon=1.6" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/platon:1.6--pyhdfd78af_1' : + 'quay.io/biocontainers/platon:1.6--pyhdfd78af_1' }" input: tuple val(prefix), file(genome) diff --git a/modules/assembly/flye.nf b/modules/assembly/flye.nf index 05e73580..29976210 100644 --- a/modules/assembly/flye.nf +++ b/modules/assembly/flye.nf @@ -4,9 +4,14 @@ process FLYE { else if (filename == "flye_${prefix}") "assembly" else null } - label 'process_high' + label = [ 'process_high', 'error_retry' ] tag "${prefix}" + conda "bioconda::flye=2.9" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1' : + 'quay.io/biocontainers/flye:2.9--py39h6935b12_1' }" + input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), val(assembly), val(resfinder_species) diff --git a/modules/assembly/unicycler.nf b/modules/assembly/unicycler.nf index 75883bf5..5b9fa642 100644 --- a/modules/assembly/unicycler.nf +++ b/modules/assembly/unicycler.nf @@ -4,9 +4,14 @@ process UNICYCLER { else if (filename == "unicycler_${prefix}") "assembly" else null } - label 'process_high' + label = [ 'process_high', 'error_retry' ] tag "${prefix}" + conda "bioconda::unicycler=0.4.8" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/unicycler:0.4.8--py38h8162308_3' : + 'quay.io/biocontainers/unicycler:0.4.8--py38h8162308_3' }" + input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), val(assembly), val(resfinder_species) diff --git a/modules/bacannot_dbs/amrfinder.nf b/modules/bacannot_dbs/amrfinder.nf index 0d3b3955..7f4ffbaf 100644 --- a/modules/bacannot_dbs/amrfinder.nf +++ b/modules/bacannot_dbs/amrfinder.nf @@ -1,7 +1,12 @@ process AMRFINDER_DB { publishDir "${params.output}/amrfinder_db", mode: 'copy', overwrite: "$params.force_update" - label = [ 'db_download', 'process_ultralow' ] - + label 'process_ultralow' + + conda "bioconda::ncbi-amrfinderplus=3.11.2" + container "${ workflow.containerEngine == 'singularity' ? + 'docker://ncbi/amr:3.11.2-2022-12-19.1' : + 'ncbi/amr:3.11.2-2022-12-19.1' }" + output: file("*") diff --git a/modules/bacannot_dbs/card.nf b/modules/bacannot_dbs/card.nf index d28feacb..411802a0 100644 --- a/modules/bacannot_dbs/card.nf +++ b/modules/bacannot_dbs/card.nf @@ -1,7 +1,7 @@ process CARD_DB { publishDir "${params.output}/card_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") @@ -11,5 +11,5 @@ process CARD_DB { wget --tries=10 https://card.mcmaster.ca/latest/data tar -xvf data ./card.json rm data - """ + """ } diff --git a/modules/bacannot_dbs/mlst.nf b/modules/bacannot_dbs/mlst.nf index 5c6401de..11de39df 100644 --- a/modules/bacannot_dbs/mlst.nf +++ b/modules/bacannot_dbs/mlst.nf @@ -1,7 +1,7 @@ process MLST_DB { publishDir "${params.output}/mlst_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") diff --git a/modules/bacannot_dbs/phigaro.nf b/modules/bacannot_dbs/phigaro.nf index 7c9c2bb5..03f9c5a5 100644 --- a/modules/bacannot_dbs/phigaro.nf +++ b/modules/bacannot_dbs/phigaro.nf @@ -1,7 +1,7 @@ process PHIGARO_DB { publishDir "${params.output}/phigaro_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_medium' ] - + output: file("*") diff --git a/modules/generic/bakta.nf b/modules/generic/bakta.nf index 80253324..16cd793a 100644 --- a/modules/generic/bakta.nf +++ b/modules/generic/bakta.nf @@ -1,11 +1,16 @@ process BAKTA { publishDir "${params.output}/${prefix}", mode: 'copy', saveAs: { filename -> - if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" - else if (filename == "annotation") "$filename" - else null + if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" + else if (filename == "annotation") "$filename" + else null } tag "${prefix}" - label = [ 'misc', 'process_medium', 'error_retry' ] + label = [ 'process_medium', 'error_retry' ] + + conda "bioconda::bakta=1.6.1" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/bakta:1.6.1--pyhdfd78af_0' : + 'quay.io/biocontainers/bakta:1.6.1--pyhdfd78af_0' }" input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), file(assembly), val(resfinder_species) diff --git a/modules/generic/barrnap.nf b/modules/generic/barrnap.nf index cea24619..147a2ac0 100644 --- a/modules/generic/barrnap.nf +++ b/modules/generic/barrnap.nf @@ -4,7 +4,12 @@ process BARRNAP { else "rRNA/$filename" } tag "${prefix}" - label = [ 'perl', 'process_low' ] + label = [ 'process_low' ] + + conda "bioconda::barrnap=0.9" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/barrnap:0.9--hdfd78af_4 ' : + 'quay.io/biocontainers/barrnap:0.9--hdfd78af_4' }" input: tuple val(prefix), file(genome) diff --git a/modules/generic/circos.nf b/modules/generic/circos.nf index 4fe3d0bf..e36db10e 100644 --- a/modules/generic/circos.nf +++ b/modules/generic/circos.nf @@ -5,7 +5,7 @@ process CIRCOS { } tag "$prefix" - label = [ 'perl', 'process_low' ] + label = [ 'misc', 'process_low' ] input: tuple val(prefix), path(inputs, stageAs: 'results*') diff --git a/modules/generic/gc_skew.nf b/modules/generic/gc_skew.nf index e4827c78..f6db9dda 100644 --- a/modules/generic/gc_skew.nf +++ b/modules/generic/gc_skew.nf @@ -1,7 +1,7 @@ process GC_SKEW { tag "$prefix" - label = [ 'python', 'process_low' ] + label = [ 'misc', 'process_low' ] input: tuple val(prefix), path(inputs) diff --git a/modules/generic/gff2gbk.nf b/modules/generic/gff2gbk.nf index df73cb82..c1e0ff88 100644 --- a/modules/generic/gff2gbk.nf +++ b/modules/generic/gff2gbk.nf @@ -10,9 +10,6 @@ process GFF2GBK { path "*.genbank", emit: results """ - # Activate env - export PATH=/opt/conda/envs/antismash/bin:\$PATH - # Run emboss seqret seqret \\ -sequence $input \\ diff --git a/modules/generic/jbrowse.nf b/modules/generic/jbrowse.nf index ea2cc183..d0b80a62 100644 --- a/modules/generic/jbrowse.nf +++ b/modules/generic/jbrowse.nf @@ -4,7 +4,7 @@ process JBROWSE { tag "${prefix}" input: - tuple val(prefix), file(merged_gff), file(draft), file("prokka_gff"), file(barrnap), file(gc_bedGraph), file(gc_chrSizes), file(resfinder_gff), file(phigaro), file(genomic_islands), file("methylation"), file("chr.sizes"), file(phispy_tsv), file(digIS_gff), file(antiSMASH), file(custom_annotations) + tuple val(prefix), file(merged_gff), file(draft), file("prokka_gff"), file(barrnap), file(gc_bedGraph), file(gc_chrSizes), file(resfinder_gff), file(phigaro), file(genomic_islands), file("methylation"), file("chr.sizes"), file(phispy_tsv), file(digIS_gff), file(antiSMASH), file(custom_annotations), file(integron_finder) output: path "*", emit: results @@ -29,6 +29,7 @@ process JBROWSE { -S chr.sizes \\ -R $resfinder_gff \\ -d $digIS_gff \\ - -A $antiSMASH + -A $antiSMASH \\ + -i $integron_finder """ } diff --git a/modules/generic/mash.nf b/modules/generic/mash.nf index 6264fdb6..0273d2bc 100644 --- a/modules/generic/mash.nf +++ b/modules/generic/mash.nf @@ -4,7 +4,12 @@ process REFSEQ_MASHER { else "refseq_masher/$filename" } tag "${prefix}" - label = [ 'python', 'process_low' ] + label = [ 'process_low' ] + + conda "bioconda::refseq_masher=0.1.2" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/refseq_masher:0.1.2--py_0' : + 'quay.io/biocontainers/refseq_masher:0.1.2--py_0' }" input: tuple val(prefix), path(genome) diff --git a/modules/generic/merge_annotations.nf b/modules/generic/merge_annotations.nf index 96d589b7..7343bd8a 100644 --- a/modules/generic/merge_annotations.nf +++ b/modules/generic/merge_annotations.nf @@ -4,7 +4,7 @@ process MERGE_ANNOTATIONS { tag "${prefix}" input: - tuple val(prefix), file('prokka_gff'), file(kofamscan), file(vfdb), file(victors), file(amrfinder), file(resfinder), file(rgi), file(iceberg), file(phast), file('digis_gff'), file(custom_databases) + tuple val(prefix), file('prokka_gff'), file(kofamscan), file(vfdb), file(victors), file(amrfinder), file(resfinder), file(rgi), file(iceberg), file(phast), file('digis_gff'), file(custom_databases), file(integron_finder) output: tuple val(prefix), path("${prefix}.gff") , emit: gff @@ -108,5 +108,11 @@ process MERGE_ANNOTATIONS { cat ${prefix}.gff transposable_elements_digis.gff | bedtools sort > tmp.out.gff ; ( cat tmp.out.gff > ${prefix}.gff && rm tmp.out.gff ); fi + + ### integron_finder results + ### integrons are unique / complete elements and should not be intersected + cat ${prefix}.gff $integron_finder | bedtools sort > tmp.gff ; + cat tmp.gff > ${prefix}.gff + rm tmp.gff """ } diff --git a/modules/generic/mlst.nf b/modules/generic/mlst.nf index 488327c8..9b67121a 100644 --- a/modules/generic/mlst.nf +++ b/modules/generic/mlst.nf @@ -1,10 +1,15 @@ process MLST { publishDir "${params.output}/${prefix}", mode: 'copy', saveAs: { filename -> - if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" - else "MLST/$filename" + if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" + else "MLST/$filename" } tag "${prefix}" - label = [ 'perl', 'process_ultralow' ] + label = [ 'process_ultralow' ] + + conda "bioconda::mlst=2.19.0" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/mlst:2.19.0--hdfd78af_1' : + 'quay.io/biocontainers/mlst:2.19.0--hdfd78af_1' }" input: tuple val(prefix), file(genome) @@ -19,9 +24,7 @@ process MLST { script: """ # update tool database - mlst_dir=\$(which mlst | sed 's/bin\\/mlst//g') - cp ${bacannot_db}/mlst_db/* -r \${mlst_dir}/db/pubmlst/ - ( cd \$mlst_dir/scripts && ./mlst-make_blast_db ) + mlst-make_blast_db.sh ${bacannot_db}/mlst_db # Save mlst tool version mlst --version > mlst_version.txt ; diff --git a/modules/generic/prokka.nf b/modules/generic/prokka.nf index 61ae6d7f..458a8346 100644 --- a/modules/generic/prokka.nf +++ b/modules/generic/prokka.nf @@ -5,7 +5,12 @@ process PROKKA { else null } tag "${prefix}" - label = [ 'perl', 'process_medium' ] + label = [ 'process_medium' ] + + conda "bioconda::prokka=1.14.6" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5321hdfd78af_4' : + 'quay.io/biocontainers/prokka:1.14.6--pl5321hdfd78af_4' }" input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), file(assembly), val(resfinder_species) diff --git a/modules/generic/reports.nf b/modules/generic/reports.nf index 29e2d418..b450093f 100644 --- a/modules/generic/reports.nf +++ b/modules/generic/reports.nf @@ -4,7 +4,7 @@ process REPORT { tag "${prefix}" input: - tuple val(prefix), file('annotation_stats.tsv'), file(gff), file(barrnap), file(mlst), file(keggsvg), file(refseq_masher_txt), file(amrfinder), file(rgi), file(rgi_parsed), file(rgi_heatmap), file(argminer_out), file(resfinder_tab), file(resfinder_point), file(resfinder_phenotable), file(vfdb_blastn), file(victors_blastp), file(phigaro_txt), file(phispy_tsv), file(iceberg_blastp), file(iceberg_blastn), file(plasmids_tsv), file(platon_tsv), file(gi_image), file(phast_blastp), file(digIS) + tuple val(prefix), file('annotation_stats.tsv'), file(gff), file(barrnap), file(mlst), file(keggsvg), file(refseq_masher_txt), file(amrfinder), file(rgi), file(rgi_parsed), file(rgi_heatmap), file(argminer_out), file(resfinder_tab), file(resfinder_point), file(resfinder_phenotable), file(vfdb_blastn), file(victors_blastp), file(phigaro_txt), file(phispy_tsv), file(iceberg_blastp), file(iceberg_blastn), file(plasmids_tsv), file(platon_tsv), file(gi_image), file(phast_blastp), file(digIS), file(integronfinder) output: path '*.html', emit: results @@ -23,54 +23,67 @@ process REPORT { ## Generate generic Report rmarkdown::render("report_general.Rmd" , \ - params = list( generic_annotation = "annotation_stats.tsv", \ - generic_annotator = "${generic_annotator}", \ - kegg = "$keggsvg", \ - barrnap = "$barrnap", \ - mlst = "$mlst", \ - refseq_masher = "$refseq_masher_txt", \ - query = "${prefix}")) ; + params = list( + generic_annotation = "annotation_stats.tsv", \ + generic_annotator = "${generic_annotator}", \ + kegg = "$keggsvg", \ + barrnap = "$barrnap", \ + mlst = "$mlst", \ + refseq_masher = "$refseq_masher_txt", \ + query = "${prefix}" + ) + ) ; ## Generate Resistance Report - rmarkdown::render("report_resistance.Rmd", params = list(\ - blast_id = ${params.blast_resistance_minid} , \ - blast_cov = ${params.blast_resistance_mincov}, \ - amrfinder = "$amrfinder", \ - query = "${prefix}", \ - rgitool = "$rgi", \ - rgiparsed = "$rgi_parsed", \ - rgi_heatmap = "$rgi_heatmap", \ - argminer_blastp = "$argminer_out", \ - resfinder_tab = "$resfinder_tab", \ - resfinder_pointfinder = "$resfinder_point", \ - resfinder_phenotype = "$resfinder_phenotable", \ - generic_annotator = "${generic_annotator}", \ - gff = "$gff")) ; + rmarkdown::render("report_resistance.Rmd", \ + params = list(\ + blast_id = ${params.blast_resistance_minid} , \ + blast_cov = ${params.blast_resistance_mincov}, \ + amrfinder = "$amrfinder", \ + query = "${prefix}", \ + rgitool = "$rgi", \ + rgiparsed = "$rgi_parsed", \ + rgi_heatmap = "$rgi_heatmap", \ + argminer_blastp = "$argminer_out", \ + resfinder_tab = "$resfinder_tab", \ + resfinder_pointfinder = "$resfinder_point", \ + resfinder_phenotype = "$resfinder_phenotable", \ + generic_annotator = "${generic_annotator}", \ + gff = "$gff" + ) + ) ; ## Generate Virulence Report rmarkdown::render("report_virulence.Rmd" , \ - params = list( blast_id = ${params.blast_virulence_minid} , \ - blast_cov = ${params.blast_virulence_mincov}, \ - vfdb_blast = "$vfdb_blastn", \ - gff = "$gff", \ - victors_blast = "$victors_blastp", \ - query = "${prefix}")) ; + params = list( + blast_id = ${params.blast_virulence_minid} , \ + blast_cov = ${params.blast_virulence_mincov}, \ + vfdb_blast = "$vfdb_blastn", \ + gff = "$gff", \ + victors_blast = "$victors_blastp", \ + query = "${prefix}" + ) + ) ; ## Generate MGEs report rmarkdown::render("report_MGEs.Rmd", \ - params = list( blast_id = ${params.blast_MGEs_minid}, \ - blast_cov = ${params.blast_MGEs_mincov}, \ - phigaro_dir = "${params.output}/prophages/phigaro", \ - phigaro_txt = "$phigaro_txt", \ - phispy_tsv = "$phispy_tsv", \ - ice_prot_blast = "$iceberg_blastp", \ - ice_genome_blast = "$iceberg_blastn", \ - plasmid_finder_tab = "$plasmids_tsv", \ - platon_tsv = "$platon_tsv", \ - query = "${prefix}", \ - gi_image = "$gi_image", \ - digis = "$digIS", \ - gff = "$gff", \ - phast_prot_blast = "$phast_blastp" )) ; + params = list( + blast_id = ${params.blast_MGEs_minid}, \ + blast_cov = ${params.blast_MGEs_mincov}, \ + phigaro_dir = "${params.output}/prophages/phigaro", \ + phigaro_txt = "$phigaro_txt", \ + phispy_tsv = "$phispy_tsv", \ + ice_prot_blast = "$iceberg_blastp", \ + ice_genome_blast = "$iceberg_blastn", \ + plasmid_finder_tab = "$plasmids_tsv", \ + platon_tsv = "$platon_tsv", \ + query = "${prefix}", \ + gi_image = "$gi_image", \ + digis = "$digIS", \ + integronfinder = "$integronfinder", \ + gff = "$gff", \ + phast_prot_blast = "$phast_blastp" + ) + ) ; """ } diff --git a/modules/generic/summary.nf b/modules/generic/summary.nf index eda601ad..4983ea97 100644 --- a/modules/generic/summary.nf +++ b/modules/generic/summary.nf @@ -1,7 +1,7 @@ process SUMMARY { publishDir "${params.output}/${prefix}", mode: 'copy' tag "${prefix}" - label = [ 'python', 'process_low' ] + label = [ 'misc', 'process_low' ] input: @@ -16,7 +16,8 @@ process SUMMARY { file(stageAs: "results/${prefix}/resistance/RGI/*"), file(stageAs: "results/${prefix}/resistance/ARGMiner/*"), file(stageAs: "results/${prefix}/resistance/*"), file(stageAs: "results/${prefix}/methylations/*"), file(stageAs: "results/${prefix}/refseq_masher/*"), file(stageAs: "results/${prefix}/*"), - file(stageAs: "results/${prefix}/*"), file(stageAs: "results/${prefix}/gffs/*") + file(stageAs: "results/${prefix}/*"), file(stageAs: "results/${prefix}/gffs/*"), + file(stageAs: "results/${prefix}/integron_finder/*") output: tuple val(prefix), path("${prefix}_summary.json"), emit: summaries @@ -25,7 +26,6 @@ process SUMMARY { """ mkdir -p results/${prefix}/annotation ln -rs annotation/* results/${prefix}/annotation - source activate falmeida-py falmeida-py bacannot2json -i results -o ${prefix}_summary.json """ } diff --git a/modules/prophages/phigaro.nf b/modules/prophages/phigaro.nf index f172cb86..4b7cca21 100644 --- a/modules/prophages/phigaro.nf +++ b/modules/prophages/phigaro.nf @@ -4,7 +4,12 @@ process PHIGARO { else "prophages/phigaro/$filename" } tag "${prefix}" - label = [ 'python', 'process_medium' ] + label = [ 'process_medium' ] + + conda "bioconda::phigaro=2.3.0" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/phigaro:2.3.0--pyh7b7c402_0' : + 'quay.io/biocontainers/phigaro:2.3.0--pyh7b7c402_0' }" input: tuple val(prefix), file("assembly.fasta") @@ -18,10 +23,7 @@ process PHIGARO { path('phigaro_version.txt') , emit: version script: - """ - # activate env - source activate phigaro - + """ # get tool version phigaro -V > phigaro_version.txt ; diff --git a/modules/prophages/phispy.nf b/modules/prophages/phispy.nf index 84b8cab4..357e4838 100644 --- a/modules/prophages/phispy.nf +++ b/modules/prophages/phispy.nf @@ -5,7 +5,12 @@ process PHISPY { else null } tag "${prefix}" - label = [ 'python', 'process_medium' ] + label = [ 'process_medium' ] + + conda "bioconda::phispy=4.2.21" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/phispy:4.2.21--py39h7cff6ad_0' : + 'quay.io/biocontainers/phispy:4.2.21--py39h7cff6ad_0' }" input: tuple val(prefix), file(input) diff --git a/modules/resistance/amrfinder.nf b/modules/resistance/amrfinder.nf index 9aa7c3cc..70e9aac4 100644 --- a/modules/resistance/amrfinder.nf +++ b/modules/resistance/amrfinder.nf @@ -4,7 +4,12 @@ process AMRFINDER { else "resistance/AMRFinderPlus/$filename" } tag "${prefix}" - label = [ 'misc', 'process_medium' ] + label = [ 'process_medium' ] + + conda "bioconda::ncbi-amrfinderplus=3.11.2" + container "${ workflow.containerEngine == 'singularity' ? + 'docker://ncbi/amr:3.11.2-2022-12-19.1' : + 'ncbi/amr:3.11.2-2022-12-19.1' }" input: tuple val(prefix), file(proteins) diff --git a/modules/resistance/rgi_annotation.nf b/modules/resistance/rgi_annotation.nf index cd83eed7..11e85c39 100644 --- a/modules/resistance/rgi_annotation.nf +++ b/modules/resistance/rgi_annotation.nf @@ -5,7 +5,12 @@ process CARD_RGI { else "resistance/RGI/$filename" } tag "${prefix}" - label = [ 'python', 'process_medium' ] + label = [ 'process_medium' ] + + conda "bioconda::rgi=5.2.1" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/rgi:5.2.1--pyhdfd78af_1' : + 'quay.io/biocontainers/rgi:5.2.1--pyhdfd78af_1' }" input: tuple val(prefix), path(input) @@ -20,10 +25,7 @@ process CARD_RGI { path("*_version.txt") , emit: version script: - """ - # activate env - source activate rgi - + """ # load database rgi load --card_json ${bacannot_db}/card_db/card.json --local diff --git a/nextflow.config b/nextflow.config index 45c8494d..12dddf36 100644 --- a/nextflow.config +++ b/nextflow.config @@ -106,8 +106,8 @@ manifest { description = "Nextflow pipeline for bacterial genome annotation" homePage = "https://github.com/fmalmeida/bacannot" mainScript = "main.nf" - nextflowVersion = ">=20.10.0" - version = '3.2' + nextflowVersion = "!>=22.10.1" + version = '3.3' } // Function to ensure that resource requirements don't go beyond diff --git a/workflows/bacannot.nf b/workflows/bacannot.nf index 57abf5d9..f5a64d31 100644 --- a/workflows/bacannot.nf +++ b/workflows/bacannot.nf @@ -20,6 +20,8 @@ include { PHAST } from '../modules/prophages/phast' include { PHIGARO } from '../modules/prophages/phigaro' include { PHISPY } from '../modules/prophages/phispy' include { ICEBERG } from '../modules/MGEs/iceberg' +include { INTEGRON_FINDER } from '../modules/MGEs/integron_finder' +include { INTEGRON_FINDER_2GFF } from '../modules/MGEs/integron_finder_2gff' include { ISLANDPATH } from '../modules/MGEs/islandpath' include { DRAW_GIS } from '../modules/MGEs/draw_gis' include { DIGIS } from '../modules/MGEs/digIS' @@ -127,9 +129,15 @@ workflow BACANNOT { platon_all_ch = Channel.empty() } + // TODO: Maybe add in MGE optional? + // IslandPath software ISLANDPATH( annotation_out_ch.gbk ) + // Integron_finder software + INTEGRON_FINDER( annotation_out_ch.genome ) + INTEGRON_FINDER_2GFF( INTEGRON_FINDER.out.gbk ) + // Virulence search if (params.skip_virulence_search == false) { // VFDB @@ -286,7 +294,8 @@ workflow BACANNOT { .join(iceberg_output_blastp_ch, remainder: true) .join(phast_output_ch, remainder: true) .join(DIGIS.out.gff, remainder: true) - .join(ch_custom_annotations, remainder: true) + .join(ch_custom_annotations, remainder: true) + .join(INTEGRON_FINDER_2GFF.out.gff, remainder: true) ) /* @@ -326,6 +335,7 @@ workflow BACANNOT { .join( MERGE_ANNOTATIONS.out.digis_gff ) .join( antismash_output_ch, remainder: true ) .join( MERGE_ANNOTATIONS.out.customdb_gff.groupTuple(), remainder: true ) + .join( INTEGRON_FINDER_2GFF.out.gff, remainder: true ) ) // Render reports @@ -360,6 +370,7 @@ workflow BACANNOT { .join( DRAW_GIS.out.example, remainder: true ) .join( phast_output_ch, remainder: true ) .join( MERGE_ANNOTATIONS.out.digis_gff ) + .join( INTEGRON_FINDER_2GFF.out.gff, remainder: true ) ) // @@ -367,27 +378,28 @@ workflow BACANNOT { // SUMMARY( annotation_out_ch.all - .join( MLST.out.all , remainder: true ) - .join( BARRNAP.out.all , remainder: true ) - .join( kofamscan_all_ch , remainder: true ) - .join( plasmidfinder_all_ch , remainder: true ) - .join( platon_all_ch , remainder: true ) - .join( ISLANDPATH.out.results , remainder: true ) - .join( vfdb_all_ch , remainder: true ) - .join( victors_all_ch , remainder: true ) - .join( phast_all_ch , remainder: true ) - .join( phigaro_all_ch , remainder: true ) - .join( phispy_all_ch , remainder: true ) - .join( iceberg_all_ch , remainder: true ) - .join( amrfinder_all_ch , remainder: true ) - .join( rgi_all_ch , remainder: true ) - .join( argminer_all_ch , remainder: true ) - .join( resfinder_all_ch , remainder: true ) - .join( CALL_METHYLATION.out.all , remainder: true ) - .join( REFSEQ_MASHER.out.results, remainder: true ) - .join( DIGIS.out.all , remainder: true ) - .join( antismash_all_ch , remainder: true ) - .join( MERGE_ANNOTATIONS.out.all, remainder: true ) + .join( MLST.out.all , remainder: true ) + .join( BARRNAP.out.all , remainder: true ) + .join( kofamscan_all_ch , remainder: true ) + .join( plasmidfinder_all_ch , remainder: true ) + .join( platon_all_ch , remainder: true ) + .join( ISLANDPATH.out.results , remainder: true ) + .join( vfdb_all_ch , remainder: true ) + .join( victors_all_ch , remainder: true ) + .join( phast_all_ch , remainder: true ) + .join( phigaro_all_ch , remainder: true ) + .join( phispy_all_ch , remainder: true ) + .join( iceberg_all_ch , remainder: true ) + .join( amrfinder_all_ch , remainder: true ) + .join( rgi_all_ch , remainder: true ) + .join( argminer_all_ch , remainder: true ) + .join( resfinder_all_ch , remainder: true ) + .join( CALL_METHYLATION.out.all , remainder: true ) + .join( REFSEQ_MASHER.out.results , remainder: true ) + .join( DIGIS.out.all , remainder: true ) + .join( antismash_all_ch , remainder: true ) + .join( MERGE_ANNOTATIONS.out.all , remainder: true ) + .join( INTEGRON_FINDER_2GFF.out.gff, remainder: true ) ) MERGE_SUMMARIES( SUMMARY.out.summaries.map{ it[1] }.collect() From 6fff59aba8d8608b081f84934e7cadeee502b437 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 3 Mar 2023 08:39:34 -0300 Subject: [PATCH 03/50] 85 prokka module can get after modules stuck if the header file longer than 20 and not separated by tab or space (#89) * add awk command to clean big fasta headers * add awk statement to clean big fasta headers * update bakta version --- conf/defaults.config | 2 +- docs/config.md | 2 +- docs/manual.md | 2 +- modules/generic/bakta.nf | 11 +++++++---- modules/generic/prokka.nf | 21 +++++++++++++-------- nextflow_schema.json | 2 +- 6 files changed, 24 insertions(+), 16 deletions(-) diff --git a/conf/defaults.config b/conf/defaults.config index 3a795347..9702f5d5 100644 --- a/conf/defaults.config +++ b/conf/defaults.config @@ -177,7 +177,7 @@ params { // but can eventually be used by users unicycler_version = '0.4.8--py38h8162308_3' flye_version = '2.9--py39h39abbe0_0' - bakta_version = '1.6.1--pyhdfd78af_0' + bakta_version = '1.7.0--pyhdfd78af_1' // Max resource options max_memory = '20.GB' diff --git a/docs/config.md b/docs/config.md index 00664232..9962abef 100644 --- a/docs/config.md +++ b/docs/config.md @@ -192,7 +192,7 @@ params { // but can eventually be used by users unicycler_version = '0.4.8--py38h8162308_3' flye_version = '2.9--py39h39abbe0_0' - bakta_version = '1.6.1--pyhdfd78af_0' + bakta_version = '1.7.0--pyhdfd78af_1' // Max resource options max_memory = '20.GB' diff --git a/docs/manual.md b/docs/manual.md index e9791743..e7f2e64f 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -131,7 +131,7 @@ Users can now select the version of the non-core tools Bakta, Unicyler and Flye. | Parameter | Default | Description | | :-------- | :------ | :---------- | -| `--bakta_version` | 1.6.1--pyhdfd78af_0 | Bakta tool version | +| `--bakta_version` | 1.7.0--pyhdfd78af_1 | Bakta tool version | | `--flye_version` | 2.9--py39h39abbe0_0 | Flye tool version | | `--unicycler_version` | 0.4.8--py38h8162308_3 | Unicycler tool version | diff --git a/modules/generic/bakta.nf b/modules/generic/bakta.nf index 16cd793a..67db3569 100644 --- a/modules/generic/bakta.nf +++ b/modules/generic/bakta.nf @@ -7,10 +7,10 @@ process BAKTA { tag "${prefix}" label = [ 'process_medium', 'error_retry' ] - conda "bioconda::bakta=1.6.1" + conda "bioconda::bakta=1.7.0" container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/bakta:1.6.1--pyhdfd78af_0' : - 'quay.io/biocontainers/bakta:1.6.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/bakta:1.7.0--pyhdfd78af_1' : + 'quay.io/biocontainers/bakta:1.7.0--pyhdfd78af_1' }" input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), file(assembly), val(resfinder_species) @@ -38,6 +38,9 @@ process BAKTA { # Save bakta version bakta --version &> bakta_version.txt ; + # clean headers char limit + awk '{ if (\$0 ~ />/) print substr(\$0,1,21) ; else print \$0 }' $assembly > cleaned_header.fasta + # Run bakta bakta \\ --output annotation \\ @@ -46,7 +49,7 @@ process BAKTA { --prefix ${prefix} \\ --strain '${prefix}' \\ --db $bakta_db \\ - $assembly + cleaned_header.fasta # fix fasta headers cut -f 1 -d ' ' annotation/${prefix}.fna > tmp.fa diff --git a/modules/generic/prokka.nf b/modules/generic/prokka.nf index 458a8346..f2339274 100644 --- a/modules/generic/prokka.nf +++ b/modules/generic/prokka.nf @@ -1,8 +1,8 @@ process PROKKA { publishDir "${params.output}/${prefix}", mode: 'copy', saveAs: { filename -> - if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" - else if (filename == "annotation") "$filename" - else null + if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" + else if (filename == "annotation") "$filename" + else null } tag "${prefix}" label = [ 'process_medium' ] @@ -31,11 +31,13 @@ process PROKKA { path('prokka_version.txt'), emit: version script: - kingdom = (params.prokka_kingdom) ? "--kingdom ${params.prokka_kingdom}" : '' - gcode = (params.prokka_genetic_code) ? "--gcode ${params.prokka_genetic_code}" : '' - rnammer = (params.prokka_use_rnammer) ? "--rnammer" : '' - models = (params.prokka_use_pgap) ? "PGAP_NCBI.hmm" : "TIGRFAMs_15.0.hmm" + kingdom = (params.prokka_kingdom) ? "--kingdom ${params.prokka_kingdom}" : '' + gcode = (params.prokka_genetic_code) ? "--gcode ${params.prokka_genetic_code}" : '' + rnammer = (params.prokka_use_rnammer) ? "--rnammer" : '' + models = (params.prokka_use_pgap) ? "PGAP_NCBI.hmm" : "TIGRFAMs_15.0.hmm" """ + #!/usr/bin/env bash + # save prokka version prokka -v &> prokka_version.txt ; @@ -50,6 +52,9 @@ process PROKKA { # hmmpress ( cd prokka_db/hmm/ ; for i in *.hmm ; do hmmpress -f \$i ; done ) + # clean headers char limit + awk '{ if (\$0 ~ />/) print substr(\$0,1,21) ; else print \$0 }' $assembly > cleaned_header.fasta + # run prokka prokka \\ --dbdir prokka_db \\ @@ -61,7 +66,7 @@ process PROKKA { --genus '' \\ --species '' \\ --strain \"${prefix}\" \\ - $assembly + cleaned_header.fasta # remove tmp dir to gain space rm -r prokka_db diff --git a/nextflow_schema.json b/nextflow_schema.json index 31ad9105..5b05992b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -336,7 +336,7 @@ "bakta_version": { "type": "string", "description": "Select quay.io image tag for tool", - "default": "1.6.1--pyhdfd78af_0" + "default": "1.7.0--pyhdfd78af_1" } } }, From 36195ffb781fefde5916407e1c7ca3f60d4fa52f Mon Sep 17 00:00:00 2001 From: fmalmeida Date: Fri, 3 Mar 2023 13:35:54 +0100 Subject: [PATCH 04/50] fix bakta stats parsing --- modules/generic/summary.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/generic/summary.nf b/modules/generic/summary.nf index 4983ea97..f5b6e257 100644 --- a/modules/generic/summary.nf +++ b/modules/generic/summary.nf @@ -26,6 +26,7 @@ process SUMMARY { """ mkdir -p results/${prefix}/annotation ln -rs annotation/* results/${prefix}/annotation + sed -i 's/s:/:/g' results/${prefix}/annotation/${prefix}.txt falmeida-py bacannot2json -i results -o ${prefix}_summary.json """ } From c178c92de5ac659832a1498a5781d996b71dc65e Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sun, 26 Mar 2023 16:52:52 +0200 Subject: [PATCH 05/50] 81 add tool mob suite (#90) * Add pre-formatted database (#83) * add pre-formatted database info * add information about pre-formatted database * add mob suite module * added results to HTML report * Update Dockerfile * added mob_suite to json summary * add tool to markdown files * add tool information to docs * add example reports --- README.md | 2 +- docker/misc/Dockerfile | 2 +- docker/renv/reports/report_MGEs.Rmd | 16 +- docker/renv/reports/yes_plasmids.Rmd | 26 + docs/index.md | 2 +- docs/installation.md | 2 +- docs/manual.md | 2 +- docs/outputs.md | 10 +- docs/reports/report_MGEs.html | 5201 ++++++++++++++++++++++++++ docs/reports/report_general.html | 5015 +++++++++++++++++++++++++ docs/reports/report_resistance.html | 5112 +++++++++++++++++++++++++ docs/reports/report_virulence.html | 5062 +++++++++++++++++++++++++ markdown/list_of_tools.md | 2 +- modules/MGEs/mob_suite.nf | 41 + modules/generic/reports.nf | 3 +- modules/generic/summary.nf | 37 +- workflows/bacannot.nf | 7 + 17 files changed, 20512 insertions(+), 30 deletions(-) create mode 100644 docs/reports/report_MGEs.html create mode 100644 docs/reports/report_general.html create mode 100644 docs/reports/report_resistance.html create mode 100644 docs/reports/report_virulence.html create mode 100644 modules/MGEs/mob_suite.nf diff --git a/README.md b/README.md index 35dbaf58..2e8ceeb9 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Its main steps are: | Annotation of integrative and conjugative elements | [ICEberg](http://db-mml.sjtu.edu.cn/ICEberg/) | | Annotation of bacterial integrons | [Integron Finder](https://github.com/gem-pasteur/Integron_Finder) | | Focused detection of insertion sequences | [digIS](https://github.com/janka2012/digIS) | -| _In silico_ detection of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/) and [Platon](https://github.com/oschwengers/platon) | +| _In silico_ detection and typing of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/), [Platon](https://github.com/oschwengers/platon) and [MOB-typer](https://github.com/phac-nml/mob-suite)| | Prediction and visualization of genomic islands | [IslandPath-DIMOB](https://github.com/brinkmanlab/islandpath) and [gff-toolbox](https://github.com/fmalmeida/gff-toolbox) | | Custom annotation from formatted FASTA or NCBI protein IDs | [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs) | | Merge of annotation results | [bedtools](https://bedtools.readthedocs.io/en/latest/) | diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index 486eff08..d184bb40 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -21,7 +21,7 @@ RUN mamba install -y \ seqkit \ bioawk \ 'easy_circos>=0.3' \ - 'falmeida-py>=1.1' \ + 'falmeida-py>=1.2' \ 'conda-forge::openssl>=1.1.1' \ emboss \ libtiff \ diff --git a/docker/renv/reports/report_MGEs.Rmd b/docker/renv/reports/report_MGEs.Rmd index cd3657a2..80fed6a0 100644 --- a/docker/renv/reports/report_MGEs.Rmd +++ b/docker/renv/reports/report_MGEs.Rmd @@ -13,6 +13,7 @@ params: phispy_tsv: plasmid_finder_tab: platon_tsv: + mobsuite_tsv: gi_image: digis: integronfinder: @@ -48,10 +49,12 @@ check_lines <- function(x) { # Read plasmids plasmid_finder_tab <- try(read.csv(params$plasmid_finder_tab, sep = "\t"), silent = TRUE) -platon_tsv <- try(read.csv(params$platon_tsv, sep = "\t"), silent = TRUE) +platon_tsv <- try(read.csv(params$platon_tsv, sep = "\t"), silent = TRUE) +mobsuite_tsv <- try(read.csv(params$mobsuite_tsv, sep = "\t", header=TRUE), silent = TRUE) # always have a line for chr if ( (class(plasmid_finder_tab) != 'try-error' & check_lines(plasmid_finder_tab) > 0) | - (class(platon_tsv) != 'try-error' & check_lines(platon_tsv) > 0) + (class(platon_tsv) != 'try-error' & check_lines(platon_tsv) > 0) | + (class(mobsuite_tsv) != 'try-error' & check_lines(mobsuite_tsv) > 1) ) { plasmids_not_null <- TRUE plasmids_null <- FALSE @@ -184,11 +187,14 @@ In this context, this pipeline is capable of automatically annotating some mobil 6. [Platon](https://github.com/oschwengers/platon); + Platon detects plasmid contigs within bacterial draft genomes from WGS short-read assemblies. + Therefore, Platon analyzes the natural distribution biases of certain protein coding genes between chromosomes and plasmids. -7. [IslandPath](https://github.com/brinkmanlab/islandpath). +7. [MOB Suite](https://github.com/phac-nml/mob-suite); + + Software tools for clustering, reconstruction and typing of plasmids from draft assemblies. + + In the pipeline, only the typer tool is used. +8. [IslandPath](https://github.com/brinkmanlab/islandpath). + IslandPath-DIMOB is a standalone software to predict genomic islands in bacterial and archaeal genomes based on the presence of dinucleotide biases and mobility genes. -8. [digIS](https://github.com/janka2012/digIS). +9. [digIS](https://github.com/janka2012/digIS). + digIS is a command-line tool for detection of insertion sequences (IS) in prokaryotic genomes. -9. [Integron Finder](https://github.com/gem-pasteur/Integron_Finder). +10. [Integron Finder](https://github.com/gem-pasteur/Integron_Finder). + a command line tool to identity integrons in DNA sequences ### Prediction thresholds diff --git a/docker/renv/reports/yes_plasmids.Rmd b/docker/renv/reports/yes_plasmids.Rmd index b01dd20f..7af439d9 100644 --- a/docker/renv/reports/yes_plasmids.Rmd +++ b/docker/renv/reports/yes_plasmids.Rmd @@ -54,4 +54,30 @@ datatable(results, columnDefs = list(list(visible=FALSE, targets=c(1,2)))), extensions = 'Buttons', rownames = F) +``` + +### MOB suite (typer) + +[MOB-typer](https://github.com/phac-nml/mob-suite) provides _in silico_ predictions of the replicon family, relaxase type, mate-pair formation type and predicted transferability of the plasmid. Using a combination of biomarkers and MOB-cluster codes, it will also provide an observed host-range of your plasmid based on its replicon, relaxase and cluster assignment. This is combined with information mined from the literature to provide a prediction of the taxonomic rank at which the plasmid is likely to be stably maintained but it does not provide source attribution predictions. + +* The complete results can be found in the directory `plasmids/mob_suite` under the main output directory. + +(#tab:mobsuite-results) In silico typing of plasmids with MOB suite +```{r} +results <- mobsuite_tsv + +# Render dt +datatable(results, + escape = FALSE, + filter = 'top', + options = list(pageLength = 5, + lengthMenu = c(5, 10, 15, 20, 50), + dom='flrtBip', + buttons = c('copy', 'csv', 'excel', 'colvis'), + scrollX = TRUE, + fixedColumns = FALSE, + autoWidth = TRUE, + columnDefs = list(list(visible=FALSE, targets=c(1,2)))), + extensions = 'Buttons', + rownames = F) ``` \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 3770d11a..3ee7b0df 100644 --- a/docs/index.md +++ b/docs/index.md @@ -35,7 +35,7 @@ The pipeline's main steps are: | Annotation of integrative and conjugative elements | [ICEberg](http://db-mml.sjtu.edu.cn/ICEberg/) | | Annotation of bacterial integrons | [Integron Finder](https://github.com/gem-pasteur/Integron_Finder) | | Focused detection of insertion sequences | [digIS](https://github.com/janka2012/digIS) | -| _In silico_ detection of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/) and [Platon](https://github.com/oschwengers/platon) | +| _In silico_ detection and typing of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/), [Platon](https://github.com/oschwengers/platon) and [MOB-typer](https://github.com/phac-nml/mob-suite)| | Prediction and visualization of genomic islands | [IslandPath-DIMOB](https://github.com/brinkmanlab/islandpath) and [gff-toolbox](https://github.com/fmalmeida/gff-toolbox) | | Custom annotation from formatted FASTA or NCBI protein IDs | [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs) | | Merge of annotation results | [bedtools](https://bedtools.readthedocs.io/en/latest/) | diff --git a/docs/installation.md b/docs/installation.md index 87c83c43..5056467d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -27,7 +27,7 @@ docker pull fmalmeida/bacannot:v3.3_renv ; docker pull fmalmeida/bacannot:jbrowse ; ``` -> The pipeline also uses other public images available in biocontainers +> The pipeline also uses other public images available in biocontainers. All images can be downloaded on the fly, automatically be nextflow. !!! info "Using singularity" diff --git a/docs/manual.md b/docs/manual.md index e7f2e64f..95cb1b35 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -88,7 +88,7 @@ The use of this parameter sets a default value for input samples. If a sample ha |
Parameter
| Required | Default | Description | | :--------------------------------------- | :------- | :------ | :---------- | | `--skip_virulence_search` | :material-close: | false | Tells whether not to run virulence factors annotation. It skips both vfdb and victors annotation | -| `--skip_plasmid_search` | :material-close: | false | Tells whether not to run plasmid detection modules | +| `--skip_plasmid_search` | :material-close: | false | Tells whether not to run plasmid detection/typing modules | | `--skip_resistance_search` | :material-close: | false | Tells whether not to run resistance genes annotation modules | | `--skip_iceberg_search` | :material-close: | false | Tells whether not to run mobile genetic elements annotation with ICEberg | | `--skip_prophage_search` | :material-close: | false | Tells whether not to run prophage annotation modules | diff --git a/docs/outputs.md b/docs/outputs.md index c072586e..ba350809 100644 --- a/docs/outputs.md +++ b/docs/outputs.md @@ -31,7 +31,7 @@ After a successful execution, you will have something like this: |   ├── KOfamscan # Results from annotation with KEGG database |   ├── methylations # Methylated sites predicted with Nanopolish (if fast5 is given) |   ├── MLST # MLST results with mlst pipeline -|   ├── plasmids # Plasmid annotation results from Platon and Plasmidfinder +|   ├── plasmids # Plasmid annotation results from Platon, Plasmidfinder and MOB Suite |   ├── prophages # Prophage annotation results from PhiSpy, Phigaro and PHAST |   ├── refseq_masher # Closest NCBI Resfseq genomes identified with refseq_masher |   ├── report_files # Annotation reports in HTML format @@ -60,10 +60,10 @@ Using both [KofamScan](https://github.com/takaram/kofam_scan) and [KEGGDecoder]( Bacannot will use [R Markdown](https://rmarkdown.rstudio.com/) to produce automatic annotation reports. To date, the available reports are: -* [Report of general annotation features](https://fmalmeida.github.io/reports/report_general.html) -* [Report of Antimicrobial resistance (AMR) genes annotation](https://fmalmeida.github.io/reports/report_resistance.html) -* [Report of virulence genes annotation](https://fmalmeida.github.io/reports/report_virulence.html) -* [Report of mobile genetic elements annotation](https://fmalmeida.github.io/reports/report_MGEs.html) +* [Report of general annotation features](./reports/report_general.html) +* [Report of Antimicrobial resistance (AMR) genes annotation](./reports/report_resistance.html) +* [Report of virulence genes annotation](./reports/report_virulence.html) +* [Report of mobile genetic elements annotation](./reports/report_MGEs.html) * Including plasmids, prophages, ICEs and genomic islands. * Report of user's custom db annotations. * The quickstart does not produce an example, however, the report is similar to the ICEberg section in the MGE example report. diff --git a/docs/reports/report_MGEs.html b/docs/reports/report_MGEs.html new file mode 100644 index 00000000..9f6e4eda --- /dev/null +++ b/docs/reports/report_MGEs.html @@ -0,0 +1,5201 @@ + + + + + + + + + + + + + + + +Annotation of mobile genetic elements + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

ecoli report of mobile genetic elements (MGEs)

+
+

About

+

Mobile genetic elements (MGEs) are a type of genetic material that can move around within a genome, or that can be transferred from one species or replicon to another. Newly acquired genes through this mechanism can increase fitness by gaining new or additional functions. On the other hand, MGEs can also decrease fitness by introducing disease-causing alleles or mutations. For instance, prophages are bacteriophages that have been inserted and integrated into the bacterial chromosome or plasmid. It is the latent form of a phage. ICEs (integrative and conjugative elements), on the other hand, are integrative mobile genetic elements that encode a conjugation machinery. They can confer selective advantages and can also encode resistance determinants and virulence factors.

+

In this context, this pipeline is capable of automatically annotating some mobile genetic elements using public available resources such as:

+
    +
  1. PHAST database; +
      +
    • PHAST (PHAge Search Tool) is a web server designed to rapidly and accurately identify, annotate and graphically display prophage sequences within bacterial genomes or plasmids.
    • +
    • Although it does not have a command line interface it has a protein database of prophage genes that were added to this pipeline and are scanned via BLASTp
    • +
  2. +
  3. Phigaro; +
      +
    • Phigaro is a standalone command-line application that is able to detect prophage regions taking raw genome and metagenome assemblies as an input.
    • +
    • It also produces dynamic annotated “prophage genome maps” and marks possible transposon insertion spots inside prophages.
    • +
  4. +
  5. PhiSpy; +
      +
    • PhiSpy identifies prophages in Bacterial (and probably Archaeal) genomes. Given an annotated genome it will use several approaches to identify the most likely prophage regions.
    • +
  6. +
  7. ICEberg database; +
      +
    • ICEberg 2.0 is an updated database of bacterial integrative and conjugative elements.
    • +
  8. +
  9. Plasmidfinder; +
      +
    • Plasmidfinder is a tool for the in silico detection of plasmids.
    • +
  10. +
  11. Platon; +
      +
    • Platon detects plasmid contigs within bacterial draft genomes from WGS short-read assemblies.
    • +
    • Therefore, Platon analyzes the natural distribution biases of certain protein coding genes between chromosomes and plasmids.
    • +
  12. +
  13. MOB Suite; +
      +
    • Software tools for clustering, reconstruction and typing of plasmids from draft assemblies.
    • +
    • In the pipeline, only the typer tool is used.
    • +
  14. +
  15. IslandPath. +
      +
    • IslandPath-DIMOB is a standalone software to predict genomic islands in bacterial and archaeal genomes based on the presence of dinucleotide biases and mobility genes.
    • +
  16. +
  17. digIS. +
      +
    • digIS is a command-line tool for detection of insertion sequences (IS) in prokaryotic genomes.
    • +
  18. +
  19. Integron Finder. +
      +
    • a command line tool to identity integrons in DNA sequences
    • +
  20. +
+
+

Prediction thresholds

+

All the predictions were passed through a user defined threshold for minimum coverage and identity:

+
    +
  • Min. Identity (%): > 85
  • +
  • Min. Coverage (%): > 85
  • +
+
+

PHAST is a protein database scanned via BLASTp; ICEberg is a protein and nucleotide database that contains the full-length sequences of known ICEs and also contains the sequences of a multitude of proteins commonly found inside these ICEs. Full-length ICEs are blasted to the genome via BLASTn while the protein sequences are blasted tto the predicted genes via BLASTp; Plasmidfinder is a nucleotide database scanned via BLASTn. The other software have its own metrics.

+
+
+
+
+

Genomic Islands prediction

+

Genomic Islands (GIs) were predicted with islandPath. The predicted genomic islands are integrated into the JBrowse genome viewer so that users can interactively interrogate the results and check the genes found inside these islands. The resulting genome browser are provided in the jbrowse directory inside the query main output directory. This genome browser can be opened with the http-server command or the JBrowse Desktop software.

+

Additionally, these genomic islands were parsed in a very generic manner in order to provide a simple visualization of the annotation in these regions. The plots were rendered with the python package gff-toolbox and are available at the directory: genomic_islands/plots in the main query output directory. An example of these plots is shown in Figure 1.

+
+Examplification of the visualization of genomic islands regions with the gff-toolbox package. +

+Figure 1: Examplification of the visualization of genomic islands regions with the gff-toolbox package. +

+
+
+

As discussed, these images were rendered in a very generic manner just to show some visualization possibilities to the user. If desired, users can check the gff-toolbox package to produce more customized plots.

+
+
+
+

Plasmid detection

+
+

Plasmidfinder

+

Plasmidfinder is a tool for the in silico detection of plasmids. Its results are summarized in Table 1

+
    +
  • The complete results can be found in the directory plasmids/plasmidfinder under the main output directory.
  • +
+ +Table 1: In silico detection of plasmids with Plasmidfinder + +
+ +
+
+

Platon

+

Platon detects plasmid contigs within bacterial draft genomes from WGS short-read assemblies. Therefore, Platon analyzes the natural distribution biases of certain protein coding genes between chromosomes and plasmids. This analysis is complemented by comprehensive contig characterizations upon which several heuristics are applied. Its results are summarized in Table 2.

+
    +
  • The complete results can be found in the directory plasmids/platon under the main output directory.
  • +
+ +Table 2: In silico detection of plasmids with Platon + +
+ +
+
+

MOB suite (typer)

+

MOB-typer provides in silico predictions of the replicon family, relaxase type, mate-pair formation type and predicted transferability of the plasmid. Using a combination of biomarkers and MOB-cluster codes, it will also provide an observed host-range of your plasmid based on its replicon, relaxase and cluster assignment. This is combined with information mined from the literature to provide a prediction of the taxonomic rank at which the plasmid is likely to be stably maintained but it does not provide source attribution predictions.

+
    +
  • The complete results can be found in the directory plasmids/mob_suite under the main output directory.
  • +
+ +Table 3: In silico typing of plasmids with MOB suite + +
+ +
+
+
+

Prophage detection

+
+

All the prophage sequences and genes are available in the genome browser provided, it is worthy taking notes of prophage’s genomic regions for a better exploration when using it. The genome browser was automatically created (stored in a dir called jbrowse) and can be visualized with JBROWSE desktop ot http-server.

+
+
+

Phigaro

+

Phigaro is a standalone command-line application that is able to detect prophage regions taking raw genome and metagenome assemblies as an input. It also produces dynamic annotated “prophage genome maps” and marks possible transposon insertion spots inside prophages. Its results can be nicely visualized in its own html report file stored in its output directory. The genomic regions predicted as putative prophage sequences are also summarized in Table 4.

+
    +
  • Check it out at: +
      +
    • Dir: prophages/phigaro in the main output directory
    • +
    • HTML: _ANNOTATION/prophages/phigaro/ecoli_phigaro.html
    • +
  • +
+
+ +Table 4: Putative prophage sequences annotated with phigaro software + +
+ +
+
+

PhiSpy

+

PhiSpy is a standalone tool that identifies prophages in Bacterial (and probably Archaeal) genomes. Given an annotated genome it will use several approaches to identify the most likely prophage regions. The genomic regions predicted as putative prophage sequences are also summarized in Table 5.

+
    +
  • Check the results at prophages/phispy in the main output directory
  • +
+
+
+ +Table 5: Putative prophage sequences annotated with phispy software + +
+ +
+
+
+

PHAST database

+

All prophage genes from PHAST database that had good alignments to the genes of the query genome are summarized in Table 6. The protein sequences of these genes were aligned against the gene sequences predicted by Prokka via BLASTp. They are all available in the genome browser provided. A good way to interrogate this annotation is to visualize the putative prophage regions predicted by phigaro and phispy interpolating it with the prophage gene annotation provided with phast database.

+
+

Unfortunately, PHASTER database have no searchable interface to visualize its prophages. Therefore, this table has no links to external sources.

+
+ +Table 6: Prophage genes annotated using PHAST database via BLASTp + +
+ +
+
+
+

ICEs detection

+
+

ICEberg database

+
+
+

Analysis of full-length ICEs

+

Full-length ICEs are available at ICEberg database in nucleotide fastas while the proteins found inside these ICEs are in protein fastas. Since the ICEfinder script has no licenses to be incorporated to the pipeline, we try to search for the full-length ICEs. However, they are very difficult to be completely found in new genomes, thus they are scanned without coverage or identity thresholds. The filtering and selection of these is up to you. We have found a total of 35 alignments in the query genome, check it out in table 7.

+

Users are advised to also use the ICEfinder tool to predict the putative genomic position of known ICEs since we are not allowed to include this step under this pipeline.

+
+ +Table 7: Alignment of full-length ICEs to the query genome via BLASTn + +
+ +
+
+

Analysis of ICE’s proteins

+

All query genes predicted by Prokka that have a match in ICEberg database are shown in Table 8. It is summarized the ICE id and all its genes that were found in the query genome. All of them are linked to the database for further investigations.

+
+

Take note: The fact that the genome possess some proteins from ICEs does not necessarily means that the ICE is present in the genome. Please, check the number of proteins that the ICE of origin posses in the ICEberg database list of ICEs, and then make inferences based one the alignments you see.

+
+

Users are advised to also use the ICEfinder tool to predict the putative genomic position of known ICEs since we are not allowed to include this step under this pipeline.

+
+ +Table 8: ICE genes annotated from ICEberg database via BLASTp + +
+ +
+The number of genes from known ICEs (from [ICEberg](https://bioinfo-mml.sjtu.edu.cn/ICEberg2/index.php)) found in the query genome +

+Figure 2: The number of genes from known ICEs (from ICEberg) found in the query genome +

+
+
+
+
+

IS detection

+

Insertions sequences have been predicted with digIS. The digIS search pipeline operates in the following steps:

+
    +
  1. The whole input nucleic acid sequence is translated into amino acid sequences (all six frames).
  2. +
  3. The translated sequences are searched using manually curated pHMMs.
  4. +
  5. The seeds are filtered by domain e-value, and those that overlap or follow each other within a certain distance are merged.
  6. +
  7. The seeds are extended according to sequence similarity with known IS elements in the ISFinder database.
  8. +
  9. Extended seeds are filtered by noise cutoff score and length, and duplicated hits, corresponding to the same IS element, are removed.
  10. +
  11. Remaining hits are classified based on sequence similarity and GenBank annotation (if available) to help assess their quality.
  12. +
  13. Finally, the classified outputs are reported in the CSV and GFF3 format.
  14. +
+
+

The program is executed with the GenBank annotation

+
+
+ +Table 9: Insertions sequences predicted by digIS in GFF format. + +
+ +
+
+

Integron detection

+

Not a integron have been predicted with Integron Finder. This might have happened either because your genome really do not have integron sequences or due to misassemblies. You can always try to run the online version of the tool: https://integronfinder.readthedocs.io/en/latest/user_guide/webserver.html

+
+
+ + + + +
+ + + + + + + + + + + + + + + + diff --git a/docs/reports/report_general.html b/docs/reports/report_general.html new file mode 100644 index 00000000..d005400b --- /dev/null +++ b/docs/reports/report_general.html @@ -0,0 +1,5015 @@ + + + + + + + + + + + + + + + +Generic annotation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

ecoli general annotation report

+
+

About

+

This report was built to summarise in a report the results of the most generic annotation contents, which are: Prokka, Barrnap, mlst, KofamScan and refseq_masher. If you’d like to see any other result included in this report please flag an enhancement issue on Github.

+
+
+

RefSeq Masher

+

RefSeq Masher is a tool that enables to rapidly find what NCBI RefSeq genomes match or are contained within your sequence data using Mash MinHash with a Mash sketch database of NCBI RefSeq Genomes. The results are shown below (bacannot outputs only the top 10).

+
+ +
+
+

MLST

+

Bacannot uses the mlst package to scan the PubMLST schemes available in order to classify the genome under public multilocus sequence type schemes. The results for ecoli are shown below.

+
+ +
+
+

Prokka

+

Prokka is generic prokaryotic genome annotation tool that produces standards-compliant output files.

+
+

In bacannot, when using prokka, the prokka database is incremented with either TIGRFAM hmm hosted at NCBI or with the extensive PGAP hmm database hosted at NCBI with the parameter --prokka_use_pgap is used.

+
+
+ +
+
+

Barrnap

+

Barrnap is a fast Ribosomal RNA predictor for bacterias, from the same developer of Prokka. It is fast and produces a GFF of the predicted rRNAs (See below).

+
+ +
+
+

KEGG KOs

+

KEGG KOs are annotated with KofamScan, which is a gene function annotation tool based on KEGG Orthology and hidden Markov model. You need KOfam database to use this tool. Online version is available on https://www.genome.jp/tools/kofamkoala/.

+After annotation, the results are plotted with KEGGDecoder (See below). +
+KEGGDecoder heatmap of KofamScan annotation results. +

+Figure 1: KEGGDecoder heatmap of KofamScan annotation results. +

+
+
+
+ + + + +
+ + + + + + + + + + + + + + + + diff --git a/docs/reports/report_resistance.html b/docs/reports/report_resistance.html new file mode 100644 index 00000000..653209ae --- /dev/null +++ b/docs/reports/report_resistance.html @@ -0,0 +1,5112 @@ + + + + + + + + + + + + + + + +Annotation of amr determinants + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

ecoli ARGs report

+
+

About

+

Antimicrobial resistance genes (ARGs) are genes that encode resistance determinants capable of conferring to the bacteria the ability to tolerate or resist some antibiotics or antimicrobials, making bacteria less susceptible to its effects. In this pipeline, ARGs have been predicted/detected with four main AMR resources:

+
    +
  1. Resfinder +
      +
    • Resfinder is a very popular database of acquired resistance genes and chromosomal point mutations hosted by the Center for Genomic Epidemiology.
    • +
  2. +
  3. AMRFinderPlus +
      +
    • AMRFinder is a software distributed by NCBI that allows users to query the National Database of Antibiotic Resistant Organisms (NDARO).
    • +
    • NDARO is a NCBI curated database that aggregates resistance genes from other databases such as Resfams, CARD, Resfinder, Pasteur Institute Beta Lactamases and others.
    • +
    • For more information please read the following link. NCBI’s efforts were done in order to standardize and confer reliability to AMR predictions.
    • +
  4. +
  5. CARD RGI +
      +
    • RGI is a software distributed by the CARD database which enables the detection of new variants and mutations that confer resistance to antibiotics, analyzing genomes or proteome sequences under three paradigms: Perfect, Strict, and Loose (a.k.a. Discovery).
    • +
    • The Perfect algorithm is most often applied to clinical surveillance as it detects perfect matches to the curated reference sequences and mutations in the CARD database.
    • +
    • In contrast, the Strict algorithm detects previously unknown variants of known AMR genes, including secondary screen for key mutations, using detection models with CARD’s curated similarity cut-offs to ensure the detected variant is likely a functional AMR gene.
    • +
  6. +
  7. ARGminer +
      +
    • RGminer database is an online resource for the inspection and curation of ARGs based on crowdsourcing as well as a platform to promote interaction and collaboration for the ARG scientific community. It is used in this pipeline in order to diversify the insights about the resistance genes found. It can not be used as the sole source of prediction, but it may be a useful contribution for analyses and AMR descriptions because it tries to aggregate and create nomenclature standards between databases.
    • +
  8. +
+
+

Prediction thresholds

+

All the predictions were passed through a user defined threshold for minimum coverage and identity:

+
    +
  • Min. Identity (%): > 90
  • +
  • Min. Coverage (%): > 90
  • +
+
+

CARD RGI have their own detection models thresholds obtained by curation. Therefore, the only result from CARD that have been filtered like that is their final tabular output (shown in this report).

+
+
+

The results used to create this report are under the directory called resistance in the output folder of the query ecoli.

+
+
+
+
+

Resfinder

+

No AMR gene was annotated either because Resfinder has not been executed (based on user’s input parameters) or the given alignment thresholds were too strict. If you believe that at least one gene should be present in the query genome you may try different thresholds.

+
+
+

CARD RGI

+

The results obtained with RGI tool are summarized in the heatmap produced by the tool itself (Figure 1). Additionally, the annotation results are also shown in an interactive table displaying the tool’s complete annotation information (Table 1). They can be roughly divided into two main categories:

+
    +
  1. Perfect hits +
      +
    • detects perfect matches to the curated reference sequences and mutations in the CARD
    • +
  2. +
  3. Strict hits +
      +
    • detects previously unknown variants of known AMR genes, including secondary screen for key mutations, using detection models with CARD’s curated similarity cut-offs to ensure the detected variant is likely a functional AMR gene
    • +
  4. +
+
+

Obs: CARD RGI tool always tries to annotate functional AMR genes, however, depending on the assembly, a not functional gene may yet be annotated. Therefore, users are advised to double check genes annotated under Strict category.

+
+


+ +Table 1: RGI annotation results. The perfect hits are highlighted in yellow while the strict hits in light blue. + +
+ +


+
+RGI's phenotype prediction. AMR genes are listed in alphabetical order and unique resistome profiles are displayed with their frequency. Yellow represents a perfect hit, Blue-green represents a strict hit. +

+Figure 1: RGI’s phenotype prediction. AMR genes are listed in alphabetical order and unique resistome profiles are displayed with their frequency. Yellow represents a perfect hit, Blue-green represents a strict hit. +

+
+
+
+

AMRFinder

+

The AMRFinderPlus annotation results are summarized below in an interactive table containing the complete annotation information (Table 2) and an image displaying the targeted drug classes (Figure 2). Whenever possible, features are linked to the NCBI database marking the closest reference sequence to each annotated gene. The results obtained with AMRFinderPlus can be roughly divided into two main categories:

+
    +
  1. Genes related to antibiotics, called AMR;
  2. +
  3. Genes related to stress resistance which can be: +
      +
    • biocide resistance
    • +
    • metal resistance
    • +
    • heat resistance
    • +
  4. +
+
+

Acquired ARGs detected

+
    +
  • AMR genes found in the query genome: +
      +
    • emrD
    • +
    • blaEC
    • +
    • mdtM
    • +
  • +
+
+
+

Supporting Data

+ +Table 2: Resistance genes annotated from NCBI AMR curated database using AMRfinderplus + +
+ +
+Resistome Predicted using NCBI's AMRFinderplus +

+Figure 2: Resistome Predicted using NCBI’s AMRFinderplus +

+
+
+
+
+

ARGminer

+

ARGminer is an online resource for the inspection and curation of ARGs based on crowdsourcing as well as a platform to promote interaction and collaboration for the ARG scientific community. We put this database here in the report and annotation in order to support the initative and to help it go towards nomenclature simplification. Genes are scanned via BLASTp since ARGminer is a protein database. This alignment is summarized in table 3.

+
+

It must be used with caution. Remember, it is a super new database thus it is rapidly changing and may yet contain errors.

+
+
+

BLAST summary

+ +Table 3: Resistance genes detected using ARGminer database via BLASTp + +
+ +
+
+
+

Prokka

+

Additionally, Prokka generically annotates a few proteins that are related to any type of resistance. These are showed in Table 4.

+
+

When using Prokka, one must take caution when evaluating this result because this annotation can be very generic and therefore not so meaningful. Because it only uses hmms, sometimes the annotation of genes can be based on a single detected motif thus its results must be checked whether they are correctly annotated and/or functional.

+
+ +Table 4: Generic annotation of resistance determinants by Prokka + +
+ +
+
+ + + + +
+ + + + + + + + + + + + + + + + diff --git a/docs/reports/report_virulence.html b/docs/reports/report_virulence.html new file mode 100644 index 00000000..1ddc92c4 --- /dev/null +++ b/docs/reports/report_virulence.html @@ -0,0 +1,5062 @@ + + + + + + + + + + + + + + + +Annotation of virulence factors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

ecoli virulence report

+
+

About

+

Virulence factors are molecules produced by bacteria that add effectiveness in their colonization ability of a niche, immunoevasion, immunosuppression and nutrition obtention from the host. In order to detect these molecules in the query genome its gene sequences were aligned against two databases:

+
    +
  1. VFDB; +
      +
    • VFDB is a comprehensive resource, created in 2004, of curated information about virulence factors of pathogenic bacteria. To date, it contains a 1080 virulence factors in its database, from >74 bacteria genera.
    • +
    • In this pipeline the VFDB nucleotide core dataset (A) was used for alignment.
    • +
  2. +
  3. Victors. +
      +
    • Victors is a curated database which currently possesses 5296 virulence factors for 194 different pathogens including bacteria, viruses and parasites and is focused in proteins related to interactions pathogen-host.
    • +
    • Victors is a protein database, thus scanned with BLASTp.
    • +
  4. +
+
+

Prediction thresholds

+

All the predictions were passed through a user defined threshold for minimum coverage and identity:

+
    +
  • Min. Identity (%): > 90
  • +
  • Min. Coverage (%): > 90
  • +
+
+

The results used to create this report are under the directory called virulence in the output folder of the query ecoli.

+
+
+
+
+

VFDB

+

All virulence factors (from VFDB) that were found to have at least one gene in the query genome are summarized below. All of them are linked to the database for further investigations. A more detailed information about the virulence annotation with VFDB is given in Table 1. Additionally, Figure 1 summarizes the amount of annotated genes of each virulence factor. Information are always linked to the database to make investigations easier.

+
+

The results are showed as: VFDB virulence factor name (VFDB virulence factor ID).

+
+ +
+

Detailed information

+ +Table 1: Virulence factors annotated using the VFDB database via BLASTn + +
+ +
+
+
+

Victors

+

All query genes predicted by Prokka that have a match in Victors database are described in Table 2. In order to make further investigations easier, all genes are linked to its matches in Victors database.

+
+

Victors database is protein-only thus genes are scanned via blastp. Victors is a new database and focused in proteins that are important in the interaction pathogen-host, thus it may contain some less known genes.

+
+ +Table 2: Virulence factors annotated using the Victors database via BLASTp + +
+ +
+
+ + + + +
+ + + + + + + + + + + + + + + + diff --git a/markdown/list_of_tools.md b/markdown/list_of_tools.md index 7151f7d1..27a1727a 100644 --- a/markdown/list_of_tools.md +++ b/markdown/list_of_tools.md @@ -18,7 +18,7 @@ These are the tools that wrapped inside bacannot. **Cite** the tools whenever yo | Annotation of integrative and conjugative elements | [ICEberg](http://db-mml.sjtu.edu.cn/ICEberg/) | | Annotation of bacterial integrons | [Integron Finder](https://github.com/gem-pasteur/Integron_Finder) | | Focused detection of insertion sequences | [digIS](https://github.com/janka2012/digIS) | -| _In silico_ detection of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/) and [Platon](https://github.com/oschwengers/platon) | +| _In silico_ detection and typing of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/), [Platon](https://github.com/oschwengers/platon) and [MOB-typer](https://github.com/phac-nml/mob-suite)| | Prediction and visualization of genomic islands | [IslandPath-DIMOB](https://github.com/brinkmanlab/islandpath) and [gff-toolbox](https://github.com/fmalmeida/gff-toolbox) | | Custom annotation from formatted FASTA or NCBI protein IDs | [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs) | | Merge of annotation results | [bedtools](https://bedtools.readthedocs.io/en/latest/) | diff --git a/modules/MGEs/mob_suite.nf b/modules/MGEs/mob_suite.nf new file mode 100644 index 00000000..9fc72d1d --- /dev/null +++ b/modules/MGEs/mob_suite.nf @@ -0,0 +1,41 @@ +process MOBSUITE { + publishDir "${params.output}", mode: 'copy', saveAs: { filename -> + if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" + else "${prefix}/plasmids/mob_suite/$filename" + } + tag "${prefix}" + label = [ 'process_medium' ] + + conda "bioconda::mob_suite=3.1.4" + container "${ workflow.containerEngine == 'singularity' ? + 'https://depot.galaxyproject.org/singularity/mob_suite:3.1.4--pyhdfd78af_0' : + 'quay.io/biocontainers/mob_suite:3.1.4--pyhdfd78af_0' }" + + input: + tuple val(prefix), file(genome) + + output: + tuple val(prefix), path("${prefix}_mobtyper_results.txt"), emit: results + path("mobtyper_version.txt") + + script: + def args = task.ext.args ?: '' + """ + # Get version + mob_typer --version > mobtyper_version.txt ; + + # run tool + mob_typer \\ + --multi \\ + --num_threads $task.cpus \\ + --sample_id $prefix \\ + --infile $genome \\ + $args \\ + --out_file ${prefix}_mobtyper_results.txt + + # convert to gff if available + # for gbk in \$(ls *.gbk) ; do + # cat \$gbk >> ${prefix}_integrons.gbk ; + # done + """ +} diff --git a/modules/generic/reports.nf b/modules/generic/reports.nf index b450093f..772d470a 100644 --- a/modules/generic/reports.nf +++ b/modules/generic/reports.nf @@ -4,7 +4,7 @@ process REPORT { tag "${prefix}" input: - tuple val(prefix), file('annotation_stats.tsv'), file(gff), file(barrnap), file(mlst), file(keggsvg), file(refseq_masher_txt), file(amrfinder), file(rgi), file(rgi_parsed), file(rgi_heatmap), file(argminer_out), file(resfinder_tab), file(resfinder_point), file(resfinder_phenotable), file(vfdb_blastn), file(victors_blastp), file(phigaro_txt), file(phispy_tsv), file(iceberg_blastp), file(iceberg_blastn), file(plasmids_tsv), file(platon_tsv), file(gi_image), file(phast_blastp), file(digIS), file(integronfinder) + tuple val(prefix), file('annotation_stats.tsv'), file(gff), file(barrnap), file(mlst), file(keggsvg), file(refseq_masher_txt), file(amrfinder), file(rgi), file(rgi_parsed), file(rgi_heatmap), file(argminer_out), file(resfinder_tab), file(resfinder_point), file(resfinder_phenotable), file(vfdb_blastn), file(victors_blastp), file(phigaro_txt), file(phispy_tsv), file(iceberg_blastp), file(iceberg_blastn), file(plasmids_tsv), file(platon_tsv), file(mobsuite_tsv), file(gi_image), file(phast_blastp), file(digIS), file(integronfinder) output: path '*.html', emit: results @@ -77,6 +77,7 @@ process REPORT { ice_genome_blast = "$iceberg_blastn", \ plasmid_finder_tab = "$plasmids_tsv", \ platon_tsv = "$platon_tsv", \ + mobsuite_tsv = "$mobsuite_tsv", \ query = "${prefix}", \ gi_image = "$gi_image", \ digis = "$digIS", \ diff --git a/modules/generic/summary.nf b/modules/generic/summary.nf index f5b6e257..f443ee8c 100644 --- a/modules/generic/summary.nf +++ b/modules/generic/summary.nf @@ -2,22 +2,33 @@ process SUMMARY { publishDir "${params.output}/${prefix}", mode: 'copy' tag "${prefix}" label = [ 'misc', 'process_low' ] - input: tuple val(prefix), - file(annotation), file(stageAs: "results/${prefix}/MLST/*"), - file(stageAs: "results/${prefix}/rRNA/*"), file(stageAs: "results/${prefix}/*"), - file(stageAs: "results/${prefix}/plasmids/*"), file(stageAs: "results/${prefix}/plasmids/*"), - file(stageAs: "results/${prefix}/genomic_islands/*"), file(stageAs: "results/${prefix}/virulence/vfdb/*"), - file(stageAs: "results/${prefix}/virulence/victors/*"), file(stageAs: "results/${prefix}/prophages/phast_db/*"), - file(stageAs: "results/${prefix}/prophages/phigaro/*"), file(stageAs: "results/${prefix}/prophages/*"), - file(stageAs: "results/${prefix}/ICEs/*"), file(stageAs: "results/${prefix}/resistance/AMRFinderPlus/*"), - file(stageAs: "results/${prefix}/resistance/RGI/*"), file(stageAs: "results/${prefix}/resistance/ARGMiner/*"), - file(stageAs: "results/${prefix}/resistance/*"), file(stageAs: "results/${prefix}/methylations/*"), - file(stageAs: "results/${prefix}/refseq_masher/*"), file(stageAs: "results/${prefix}/*"), - file(stageAs: "results/${prefix}/*"), file(stageAs: "results/${prefix}/gffs/*"), - file(stageAs: "results/${prefix}/integron_finder/*") + file(annotation), + file(stageAs: "results/${prefix}/MLST/*"), + file(stageAs: "results/${prefix}/rRNA/*"), + file(stageAs: "results/${prefix}/*"), + file(stageAs: "results/${prefix}/plasmids/*"), + file(stageAs: "results/${prefix}/plasmids/*"), + file(stageAs: "results/${prefix}/genomic_islands/*"), + file(stageAs: "results/${prefix}/virulence/vfdb/*"), + file(stageAs: "results/${prefix}/virulence/victors/*"), + file(stageAs: "results/${prefix}/prophages/phast_db/*"), + file(stageAs: "results/${prefix}/prophages/phigaro/*"), + file(stageAs: "results/${prefix}/prophages/*"), + file(stageAs: "results/${prefix}/ICEs/*"), + file(stageAs: "results/${prefix}/resistance/AMRFinderPlus/*"), + file(stageAs: "results/${prefix}/resistance/RGI/*"), + file(stageAs: "results/${prefix}/resistance/ARGMiner/*"), + file(stageAs: "results/${prefix}/resistance/*"), + file(stageAs: "results/${prefix}/methylations/*"), + file(stageAs: "results/${prefix}/refseq_masher/*"), + file(stageAs: "results/${prefix}/*"), + file(stageAs: "results/${prefix}/*"), + file(stageAs: "results/${prefix}/gffs/*"), + file(stageAs: "results/${prefix}/integron_finder/*"), + file(stageAs: "results/${prefix}/plasmids/mob_suite/*") output: tuple val(prefix), path("${prefix}_summary.json"), emit: summaries diff --git a/workflows/bacannot.nf b/workflows/bacannot.nf index f5a64d31..93087adf 100644 --- a/workflows/bacannot.nf +++ b/workflows/bacannot.nf @@ -14,6 +14,7 @@ include { KOFAMSCAN } from '../modules/KOs/kofamscan' include { KEGG_DECODER } from '../modules/KOs/kegg-decoder' include { PLASMIDFINDER } from '../modules/MGEs/plasmidfinder' include { PLATON } from '../modules/MGEs/platon' +include { MOBSUITE } from '../modules/MGEs/mob_suite' include { VFDB } from '../modules/virulence/vfdb' include { VICTORS } from '../modules/virulence/victors' include { PHAST } from '../modules/prophages/phast' @@ -122,11 +123,15 @@ workflow BACANNOT { PLATON( annotation_out_ch.genome, dbs_ch ) platon_output_ch = PLATON.out.results platon_all_ch = PLATON.out.all + // mob suite + MOBSUITE( annotation_out_ch.genome ) + mobsuite_output_ch = MOBSUITE.out.results } else { plasmidfinder_all_ch = Channel.empty() plasmidfinder_output_ch = Channel.empty() platon_output_ch = Channel.empty() platon_all_ch = Channel.empty() + mobsuite_output_ch = Channel.empty() } // TODO: Maybe add in MGE optional? @@ -367,6 +372,7 @@ workflow BACANNOT { .join( iceberg_output_blastn_ch, remainder: true ) .join( plasmidfinder_output_ch, remainder: true ) .join( platon_output_ch, remainder: true ) + .join( mobsuite_output_ch, remainder: true ) .join( DRAW_GIS.out.example, remainder: true ) .join( phast_output_ch, remainder: true ) .join( MERGE_ANNOTATIONS.out.digis_gff ) @@ -400,6 +406,7 @@ workflow BACANNOT { .join( antismash_all_ch , remainder: true ) .join( MERGE_ANNOTATIONS.out.all , remainder: true ) .join( INTEGRON_FINDER_2GFF.out.gff, remainder: true ) + .join( mobsuite_output_ch , remainder: true ) ) MERGE_SUMMARIES( SUMMARY.out.summaries.map{ it[1] }.collect() From 567b2bd164b5273af75cad3ab538527141e60575 Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Thu, 4 May 2023 06:06:40 -0300 Subject: [PATCH 06/50] update singularity config --- conf/docker.config | 1 - conf/singularity.config | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/conf/docker.config b/conf/docker.config index a78d142f..33d65328 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -13,7 +13,6 @@ docker { // specific images process { - // container with various tools for general purposes withLabel: 'db_download|db_tools|misc' { container = 'fmalmeida/bacannot:v3.3_misc' diff --git a/conf/singularity.config b/conf/singularity.config index eb8c0040..5c7a1504 100644 --- a/conf/singularity.config +++ b/conf/singularity.config @@ -1,7 +1,8 @@ // Container usage and permission docker.enabled = false singularity.enabled = true -singularity.runOptions = '--writable-tmpfs -B $PWD' +// singularity.runOptions = '--writable-tmpfs -B $PWD' +singularity.autoMounts = true /* @@ -10,8 +11,7 @@ singularity.runOptions = '--writable-tmpfs -B $PWD' */ // specific images -process { - +process { // container with various tools for general purposes withLabel: 'db_download|db_tools|misc' { container = 'docker://fmalmeida/bacannot:v3.3_misc' From dedef45dbddb1a32770ee1266b97b0111c47f9b1 Mon Sep 17 00:00:00 2001 From: falmeida Date: Fri, 5 May 2023 03:20:39 -0300 Subject: [PATCH 07/50] fixed kofamscan download --- modules/bacannot_dbs/kofamscan.nf | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/bacannot_dbs/kofamscan.nf b/modules/bacannot_dbs/kofamscan.nf index 71baa955..0e010adc 100644 --- a/modules/bacannot_dbs/kofamscan.nf +++ b/modules/bacannot_dbs/kofamscan.nf @@ -6,14 +6,23 @@ process KOFAMSCAN_DB { file("*") script: + if (workflow.containerEngine != 'singularity') { + chmod_cmd = 'chmod a+rw profiles.tar.gz ko_list' + chown_cmd = 'chown -R root:\$(id -g) profiles' + tar_cmd = '--same-owner' + } else { + chmod_cmd = '' + chown_cmd = '' + tar_cmd = '' + } """ # download kofamscan database wget --tries=10 ftp://ftp.genome.jp/pub/db/kofam/ko_list.gz wget --tries=10 ftp://ftp.genome.jp/pub/db/kofam/profiles.tar.gz gunzip ko_list.gz - chmod a+rw profiles.tar.gz ko_list - tar --same-owner -xvzf profiles.tar.gz - chown -R root:\$(id -g) profiles + $chmod_cmd + tar $tar_cmd -xvzf profiles.tar.gz + $chown_cmd rm -rf profiles.tar.gz # for the sake of size and fastness From c2556eefe61287ef3647fdfce8779053f80ade17 Mon Sep 17 00:00:00 2001 From: falmeida Date: Sat, 6 May 2023 05:43:14 -0300 Subject: [PATCH 08/50] fix dockerfile --- docker/misc/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index d184bb40..bdd3e960 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -9,11 +9,10 @@ RUN conda update -n base -c defaults conda && conda install -c conda-forge -y 'm RUN mamba install -y \ -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ --no-channel-priority \ - 'conda-forge::python>=3.10' \ + 'conda-forge::python>=3.7' \ 'blast>=2.12' \ 'diamond>=2.0.15' \ 'bedtools>=2.30' \ - 'samtools' \ 'kma' \ 'nanopolish' \ 'biopython==1.78' \ @@ -27,6 +26,8 @@ RUN mamba install -y \ libtiff \ jq && \ mamba clean -afy +# Install samtools +RUN apt-get update -y && apt-get install -y samtools # Create env for digIS RUN mamba create -y \ @@ -68,7 +69,7 @@ ENV PATH=/work/digIS:$PATH # Create env for antismash RUN conda create -y -n antismash -c bioconda -c conda-forge \ 'antismash>=6' 'anaconda::jinja2' 'anaconda::markupsafe' nomkl && \ - rm -r /opt/conda/envs/antismash/lib/*/site-packages/antismash/databases && \ + rm -rf /opt/conda/envs/antismash/lib/*/site-packages/antismash/databases && \ mamba clean -afy # fix bioperl From a934e4b8d92e1df70d5162e596fe3c1fb83cca5a Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Sat, 6 May 2023 11:45:18 -0300 Subject: [PATCH 09/50] Fix unicycler tag --- modules/assembly/unicycler.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/assembly/unicycler.nf b/modules/assembly/unicycler.nf index 5b9fa642..25ff97d6 100644 --- a/modules/assembly/unicycler.nf +++ b/modules/assembly/unicycler.nf @@ -9,8 +9,8 @@ process UNICYCLER { conda "bioconda::unicycler=0.4.8" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/unicycler:0.4.8--py38h8162308_3' : - 'quay.io/biocontainers/unicycler:0.4.8--py38h8162308_3' }" + 'https://depot.galaxyproject.org/singularity/unicycler:0.5.0--py39h2add14b_2' : + 'quay.io/biocontainers/unicycler:0.5.0--py39h2add14b_2' }" input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), val(assembly), val(resfinder_species) From 460ceac90945939784fbe1f2b0e8bdb5258a092c Mon Sep 17 00:00:00 2001 From: falmeida Date: Sun, 7 May 2023 08:34:50 -0300 Subject: [PATCH 10/50] use only docker images to avoid timeout error --- modules/generic/prokka.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/generic/prokka.nf b/modules/generic/prokka.nf index f2339274..390b011f 100644 --- a/modules/generic/prokka.nf +++ b/modules/generic/prokka.nf @@ -9,8 +9,8 @@ process PROKKA { conda "bioconda::prokka=1.14.6" container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5321hdfd78af_4' : - 'quay.io/biocontainers/prokka:1.14.6--pl5321hdfd78af_4' }" + 'docker://quay.io/biocontainers/prokka:1.14.6--pl5262hdfd78af_3' : + 'quay.io/biocontainers/prokka:1.14.6--pl5262hdfd78af_3' }" input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), file(assembly), val(resfinder_species) From f527af21a945e9b5f8273d8e52bd2f6560d1fed0 Mon Sep 17 00:00:00 2001 From: falmeida Date: Sun, 7 May 2023 12:09:15 -0300 Subject: [PATCH 11/50] use docker ocntainer to avoid singularity timeout --- modules/generic/bakta.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/generic/bakta.nf b/modules/generic/bakta.nf index 67db3569..95995c26 100644 --- a/modules/generic/bakta.nf +++ b/modules/generic/bakta.nf @@ -9,7 +9,7 @@ process BAKTA { conda "bioconda::bakta=1.7.0" container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/bakta:1.7.0--pyhdfd78af_1' : + 'docker://quay.io/biocontainers/bakta:1.7.0--pyhdfd78af_1' : 'quay.io/biocontainers/bakta:1.7.0--pyhdfd78af_1' }" input: From 62e63c5937236903bba6837be169d91f779ac667 Mon Sep 17 00:00:00 2001 From: falmeida Date: Sun, 7 May 2023 18:00:12 -0300 Subject: [PATCH 12/50] fixed resfinder for singularity --- modules/resistance/resfinder.nf | 63 ++++++++++++--------------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/modules/resistance/resfinder.nf b/modules/resistance/resfinder.nf index 36dbee8d..9b7105e9 100644 --- a/modules/resistance/resfinder.nf +++ b/modules/resistance/resfinder.nf @@ -20,14 +20,11 @@ process RESFINDER { script: resistance_minid = params.blast_resistance_minid / 100.00 resistance_mincov = params.blast_resistance_mincov / 100.00 - if (resfinder_species.toLowerCase() != "other") + """ # activate env source activate resfinder - # Make databases available - ln -rs ${bacannot_db}/resfinder_db/db_* \$(dirname \$(which run_resfinder.py)) - # Run resfinder acquired resistance run_resfinder.py \\ --inputfasta $genome \\ @@ -35,53 +32,39 @@ process RESFINDER { --species \"${resfinder_species}\" \\ --min_cov ${resistance_mincov} \\ --threshold ${resistance_minid} \\ + --db_path_point ${bacannot_db}/resfinder_db/db_pointfinder \\ + --db_path_res ${bacannot_db}/resfinder_db/db_resfinder \\ --acquired ; # Fix name of pheno table mv resfinder/pheno_table.txt resfinder/args_pheno_table.txt &> /dev/null ; # Run resfinder pointfinder resistance - run_resfinder.py \\ - --inputfasta $genome \\ - -o resfinder \\ - --species \"${resfinder_species}\" \\ - --min_cov ${resistance_mincov} \\ - --threshold ${resistance_minid} \\ - --point ; - - # Fix name of pheno table - mv resfinder/pheno_table.txt resfinder/mutation_pheno_table.txt &> /dev/null ; - - # Convert to GFF - resfinder2gff.py \\ - -i resfinder/ResFinder_results_tab.txt > resfinder/results_tab.gff ; - """ - - else if (resfinder_species.toLowerCase() == "other") - """ - # activate env - source activate resfinder + if [ \"${resfinder_species.toLowerCase()}\" != "other" ]; then - # Make databases available - ln -rs ${bacannot_db}/resfinder_db/db_* \$(dirname \$(which run_resfinder.py)) + run_resfinder.py \\ + --inputfasta $genome \\ + -o resfinder \\ + --species \"${resfinder_species}\" \\ + --min_cov ${resistance_mincov} \\ + --threshold ${resistance_minid} \\ + --db_path_point ${bacannot_db}/resfinder_db/db_pointfinder \\ + --db_path_res ${bacannot_db}/resfinder_db/db_resfinder \\ + --point ; - # Run resfinder acquired resistance - run_resfinder.py \\ - --inputfasta $genome \\ - -o resfinder \\ - --species \"${resfinder_species}\" \\ - --min_cov ${resistance_mincov} \\ - --threshold ${resistance_minid} \\ - --acquired ; - - # Fix name of pheno table - mv resfinder/pheno_table.txt resfinder/args_pheno_table.txt &> /dev/null ; - - # touch pointfinder - touch resfinder/PointFinder_results.txt ; + # Fix name of pheno table + mv resfinder/pheno_table.txt resfinder/mutation_pheno_table.txt &> /dev/null ; + + else + # touch pointfinder + touch resfinder/PointFinder_results.txt ; + + fi + # Convert to GFF resfinder2gff.py \\ -i resfinder/ResFinder_results_tab.txt > resfinder/results_tab.gff ; """ + } From 67f9942ae6373751d3d6daa83c9630c11dd9e3e4 Mon Sep 17 00:00:00 2001 From: falmeida Date: Sun, 7 May 2023 18:00:30 -0300 Subject: [PATCH 13/50] fixed docker image --- docker/misc/Dockerfile | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index bdd3e960..9f06cb42 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -3,7 +3,9 @@ LABEL authors="Felipe Almeida" \ description="Docker image containing any-based bacannot tools" # install mamba -RUN conda update -n base -c defaults conda && conda install -c conda-forge -y 'mamba>=1.0' +RUN conda update -n root conda --yes && \ + conda update -n base conda --yes && \ + conda install -c conda-forge -y 'mamba>=1.4' # Install the conda environment RUN mamba install -y \ @@ -16,7 +18,6 @@ RUN mamba install -y \ 'kma' \ 'nanopolish' \ 'biopython==1.78' \ - gff-toolbox \ seqkit \ bioawk \ 'easy_circos>=0.3' \ @@ -26,9 +27,16 @@ RUN mamba install -y \ libtiff \ jq && \ mamba clean -afy + # Install samtools RUN apt-get update -y && apt-get install -y samtools +# Install gff-toolbox +RUN git clone https://github.com/fmalmeida/gff-toolbox.git && \ + cd gff-toolbox && \ + python setup.py install && \ + gff-toolbox -h + # Create env for digIS RUN mamba create -y \ -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ From a726274febb7d85ca35e87bd74e36cc008e911b4 Mon Sep 17 00:00:00 2001 From: falmeida Date: Mon, 8 May 2023 14:31:29 -0300 Subject: [PATCH 14/50] fix gff2sql in singularity --- bin/gff2sql.R | 4 +- docker/renv/scripts/rscripts/gff2sql.R | 139 ------------------------- modules/generic/gff2sql.nf | 3 - 3 files changed, 2 insertions(+), 144 deletions(-) delete mode 100644 docker/renv/scripts/rscripts/gff2sql.R diff --git a/bin/gff2sql.R b/bin/gff2sql.R index 61eac29d..94920dfc 100755 --- a/bin/gff2sql.R +++ b/bin/gff2sql.R @@ -54,8 +54,8 @@ addTable <- function (con, sql, input) { # Loading SQL database driver drv <- dbDriver("SQLite") -dbname <- file.path("/work", opt$out) -con <- dbConnect(drv, dbname=dbname) +print(opt$out) +con <- dbConnect(drv, dbname=opt$out) ##################################### ### First STEP load GENOME to sql ### diff --git a/docker/renv/scripts/rscripts/gff2sql.R b/docker/renv/scripts/rscripts/gff2sql.R deleted file mode 100644 index 61eac29d..00000000 --- a/docker/renv/scripts/rscripts/gff2sql.R +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/Rscript -doc <- 'usage: gff2sql.R [--input= --out= --fasta= --nucleotide= --aminoacid=] - -options: - -i, --input= GFF file to transform in SQL - -o, --out= SQL database name to output [default: out.sql] - -n, --nucleotide= Takes in the nucleotide FASTA. - -a, --aminoacid= Takes in the protein FASTA - -f, --fasta= Takes in the genome FASTA' - -# Loading required packages -suppressMessages(library("docopt")) -suppressMessages(library(RSQLite)) -suppressMessages(library(dplyr)) -suppressMessages(library(stringr)) -suppressMessages(library(DataCombine)) -suppressMessages(library(Biostrings)) - -# Parse help -opt <- docopt(doc) - -# Useful functions -## Query the 9th column -getAttributeField <- function (x, field, attrsep = ";") { - s = strsplit(x, split = attrsep, fixed = TRUE) - sapply(s, function(atts) { - a = strsplit(atts, split = "=", fixed = TRUE) - m = match(field, sapply(a, "[", 1)) - if (!is.na(m)) { rv = a[[m]][2] - } - else { - rv = as.character(NA) - } - return(rv) - }) -} - -## Add table to SQL db -addTable <- function (con, sql, input) { - ## Open db - suppressWarnings(dbBegin(con)) - - ## Send rule - res <- suppressWarnings(dbSendQuery(con, sql)) - - ## Insert data based on rule - suppressWarnings(dbBind(res, input)) - suppressWarnings(dbFetch(res)) - suppressWarnings(dbClearResult(res)) - - ## Close db - suppressWarnings(dbCommit(con)) -} - -# Loading SQL database driver -drv <- dbDriver("SQLite") -dbname <- file.path("/work", opt$out) -con <- dbConnect(drv, dbname=dbname) - -##################################### -### First STEP load GENOME to sql ### -##################################### -fastaFile <- readDNAStringSet(opt$fasta) -seq_name = names(fastaFile) -#sequence = paste(fastaFile) -sequence_len = sapply(fastaFile, function(x) { - length(x)[[1]] -}) -genome <- data.frame(seq_name, sequence_len) -names(genome) <- c("Contig", "Length") - -# Create SQL table for the genome sequence -suppressWarnings(dbGetQuery(con, "CREATE Table Genome (Contig TEXT, Length TEXT)")) -# Create sql rule -sql <- "INSERT INTO Genome VALUES ($Contig, $Length)" -# Add to SQL db -addTable(con, sql, genome) - -################################### -### Second STEP load GFF to sql ### -################################### - -# Loading GFF file -gff <- read.delim(opt$input, header = FALSE, stringsAsFactors = FALSE) -# Give data a header -names(gff) <- c("chr", "source", "feature", "start", "end", "score", "strand", "frame", "attributes") -# Get IDs -gff$ID <- getAttributeField(as.character(gff$attributes), "ID", ";") -# Reorder columns -gff <- gff %>% select(chr, source, ID, feature, start, end, score, strand, frame, attributes) -# Create SQL table to store GFF data -suppressWarnings(dbGetQuery(con, "CREATE Table GFF (Contig TEXT, Source TEXT, ID TEXT, Feature TEXT, - Start INTEGER, End INTEGER, Score INTEGER, Strand TEXT, - Frame INTEGER, Attributes TEXT)")) -# Create sql rule -sql <- "INSERT INTO GFF VALUES ($chr, $source, $ID, $feature, -$start, $end, $score, $strand, $frame, $attributes)" -# Add to SQL db -addTable(con, sql, gff) - -############################################## -### Third STEP load gene nucl fasta to sql ### -############################################## - -## Loading Protein fasta -genes <- readAAStringSet(opt$aminoacid) -gene_ids <- sapply(names(genes), function(x) { - unlist(strsplit(as.character(x), " "))[1] -}) -gene_desc <- sapply(names(genes), function(x) { - paste0(unlist(strsplit(as.character(x), " "))[-1], collapse = " ") -}) -sequences = paste(genes) -genes_aa <- data.frame(gene_ids, gene_desc, sequences) -names(genes_aa) <- c("ID", "Description", "Sequence") -## Create SQL table to store Protein FASTA -suppressWarnings(dbGetQuery(con, "CREATE Table ProteinFasta (ID TEXT, Description TEXT, Sequence TEXT)")) -## Create sql rule -sql <- "INSERT INTO ProteinFasta VALUES ($ID, $Description, $Sequence)" -# Add to SQL db -addTable(con, sql, genes_aa) - -## Loading Nucleotide fasta -genes <- readDNAStringSet(opt$nucleotide) -gene_ids <- sapply(names(genes), function(x) { - unlist(strsplit(as.character(x), " "))[1] -}) -gene_desc <- sapply(names(genes), function(x) { - paste0(unlist(strsplit(as.character(x), " "))[-1], collapse = " ") -}) -sequences = paste(genes) -genes_ncl <- data.frame(gene_ids, gene_desc, sequences) -names(genes_ncl) <- c("ID", "Description", "Sequence") -## Create SQL table to store Protein FASTA -suppressWarnings(dbGetQuery(con, "CREATE Table NucleotideFasta (ID TEXT, Description TEXT, Sequence TEXT)")) -## Create sql rule -sql <- "INSERT INTO NucleotideFasta VALUES ($ID, $Description, $Sequence)" -# Add to SQL db -addTable(con, sql, genes_ncl) diff --git a/modules/generic/gff2sql.nf b/modules/generic/gff2sql.nf index 536609af..0c34824a 100644 --- a/modules/generic/gff2sql.nf +++ b/modules/generic/gff2sql.nf @@ -33,9 +33,6 @@ process CREATE_SQL { fi - # Save results with better name - mv /work/${prefix}.sqlite . ; - # Save parser cp /work/bscripts/run_server.sh . ; """ From a9fa23ea8d6c4dc97a2f06457439b62f197bd986 Mon Sep 17 00:00:00 2001 From: falmeida Date: Tue, 9 May 2023 04:11:52 -0300 Subject: [PATCH 15/50] use proper singularity images --- modules/generic/bakta.nf | 2 +- modules/generic/prokka.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/generic/bakta.nf b/modules/generic/bakta.nf index 95995c26..67db3569 100644 --- a/modules/generic/bakta.nf +++ b/modules/generic/bakta.nf @@ -9,7 +9,7 @@ process BAKTA { conda "bioconda::bakta=1.7.0" container "${ workflow.containerEngine == 'singularity' ? - 'docker://quay.io/biocontainers/bakta:1.7.0--pyhdfd78af_1' : + 'https://depot.galaxyproject.org/singularity/bakta:1.7.0--pyhdfd78af_1' : 'quay.io/biocontainers/bakta:1.7.0--pyhdfd78af_1' }" input: diff --git a/modules/generic/prokka.nf b/modules/generic/prokka.nf index 390b011f..9dc687cc 100644 --- a/modules/generic/prokka.nf +++ b/modules/generic/prokka.nf @@ -9,7 +9,7 @@ process PROKKA { conda "bioconda::prokka=1.14.6" container "${ workflow.containerEngine == 'singularity' ? - 'docker://quay.io/biocontainers/prokka:1.14.6--pl5262hdfd78af_3' : + 'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5262hdfd78af_3' : 'quay.io/biocontainers/prokka:1.14.6--pl5262hdfd78af_3' }" input: From c470d565e17a6a0caeb949b937af42452c38c1ba Mon Sep 17 00:00:00 2001 From: falmeida Date: Tue, 9 May 2023 04:12:00 -0300 Subject: [PATCH 16/50] fix singularity image download --- conf/singularity.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/singularity.config b/conf/singularity.config index 5c7a1504..1a53bb76 100644 --- a/conf/singularity.config +++ b/conf/singularity.config @@ -3,6 +3,7 @@ docker.enabled = false singularity.enabled = true // singularity.runOptions = '--writable-tmpfs -B $PWD' singularity.autoMounts = true +env.SINGULARITY_DISABLE_CACHE = 1 /* From b0ab9031846b114a0a358f77d32225688d0152f9 Mon Sep 17 00:00:00 2001 From: falmeida Date: Tue, 9 May 2023 09:43:39 -0300 Subject: [PATCH 17/50] fixed docker image --- modules/MGEs/mob_suite.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/MGEs/mob_suite.nf b/modules/MGEs/mob_suite.nf index 9fc72d1d..e59adee0 100644 --- a/modules/MGEs/mob_suite.nf +++ b/modules/MGEs/mob_suite.nf @@ -8,7 +8,7 @@ process MOBSUITE { conda "bioconda::mob_suite=3.1.4" container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/mob_suite:3.1.4--pyhdfd78af_0' : + 'docker://quay.io/biocontainers/mob_suite:3.1.4--pyhdfd78af_0' : 'quay.io/biocontainers/mob_suite:3.1.4--pyhdfd78af_0' }" input: From 4c33abf8ef9cbdeb0af57942558cf836fcf963b8 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 18 May 2023 10:31:04 +0200 Subject: [PATCH 18/50] Add option for prebuilt db download (#94) * include module to download pre-built databases * update docs --- README.md | 8 ++++++- conf/defaults.config | 5 +++-- docker/misc/Dockerfile | 3 +++ docs/quickstart.md | 7 ++++++ lib/WorkflowBacannot.groovy | 16 ++++++++++++-- main.nf | 2 +- modules/bacannot_dbs/argminer.nf | 2 +- modules/bacannot_dbs/get_zenodo.nf | 18 +++++++++++++++ workflows/bacannot_dbs.nf | 35 +++++++++++++++++------------- 9 files changed, 74 insertions(+), 22 deletions(-) create mode 100644 modules/bacannot_dbs/get_zenodo.nf diff --git a/README.md b/README.md index 2e8ceeb9..4fdfa4fd 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ These images have been kept separate to not create massive Docker image and to a docker pull fmalmeida/bacannot:jbrowse ; ``` -🔥 Nextflow can also automatically handle images download on the fly when executed. All the other docker images from biocontainers are downloaded automatically. If docker has exceeded its download limit rates, please try again in a few hours. +🔥 Nextflow can also automatically handle images download on the fly when executed. All the other docker images from **biocontainers** are downloaded automatically. If docker has exceeded its download limit rates, please try again in a few hours. 2. Install Nextflow (version 20.10 or higher): @@ -120,6 +120,12 @@ Users can directly download pre-formatted databases from Zenodo: https://doi.org Useful for standardization and also overcoming known issues that may arise when formatting databases with `singularity` profile. +A module to download the latest pre-formatted database has also been made available: +```bash +# Download pipeline pre-built databases +nextflow run fmalmeida/bacannot --get_zenodo_db --output ./ -profile +``` + #### I want to generate a new formatted database To download and format a copy of required bacannot databases users can execute the following: diff --git a/conf/defaults.config b/conf/defaults.config index 9702f5d5..8e8aaf72 100644 --- a/conf/defaults.config +++ b/conf/defaults.config @@ -14,8 +14,9 @@ params { // Trigger database download and formatting workflow? --> will not run annotation // Will download and format a database inside {output} parameter - get_dbs = false - force_update = false + get_dbs = false + force_update = false + get_zenodo_db = false // download pre-built database /* diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index 9f06cb42..7e718db0 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -87,6 +87,9 @@ RUN conda run -n perl PERL5LIB= PERL_LOCAL_LIB_ROOT= cpanm Bio::Root::RootI # fix python RUN python3 -m pip install cryptography==38.0.4 +# install get zenodo +RUN pip3 install zenodo_get + # fix permissions RUN chmod 777 -R /work RUN chmod 777 -R /opt/conda/envs/antismash/lib/*/site-packages/antismash diff --git a/docs/quickstart.md b/docs/quickstart.md index 3f68c7f0..5490726f 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -41,6 +41,13 @@ Users can directly download pre-formatted databases from Zenodo: https://doi.org Useful for standardization and also overcoming known issues that may arise when formatting databases with `singularity` profile. +A module to download the latest pre-formatted database has also been made available: + +```bash +# Download pipeline pre-built databases +nextflow run fmalmeida/bacannot --get_zenodo_db --output ./ -profile +``` + #### I want to generate a new formatted database ```{bash .annotate hl_lines="5"} diff --git a/lib/WorkflowBacannot.groovy b/lib/WorkflowBacannot.groovy index 94e05db2..527e14c0 100755 --- a/lib/WorkflowBacannot.groovy +++ b/lib/WorkflowBacannot.groovy @@ -10,8 +10,20 @@ class WorkflowBacannot { public static void initialise(params, log) { // input has been given and user does not want to download databases? - if (!params.input && !params.get_dbs) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.yml'. Or select the download databases mode with --get_dbs." + if (!params.input && !params.get_dbs && !params.get_zenodo_db) { + log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.yml'. Or select the download databases mode with --get_dbs or --get_zenodo_db" + System.exit(1) + } + + // using incompatible parameters? + if (params.input && (params.get_dbs || params.get_zenodo_db)) { + log.error "Not possible to run (--input) the pipeline and try to download databases (--get_dbs or --get_zenodo_db). Please do one or another." + System.exit(1) + } + + // input has been given and user does not want to download databases? + if (params.get_dbs && params.get_zenodo_db) { + log.error "Please select either --get_dbs or --get_zenodo_db, not both at the same time." System.exit(1) } diff --git a/main.nf b/main.nf index f86a7eb3..7c8421ef 100644 --- a/main.nf +++ b/main.nf @@ -36,7 +36,7 @@ include { CREATE_DBS } from './workflows/bacannot_dbs.nf' workflow { - if (params.get_dbs) { + if (params.get_dbs || params.get_zenodo_db) { CREATE_DBS() } else { if (params.input) { diff --git a/modules/bacannot_dbs/argminer.nf b/modules/bacannot_dbs/argminer.nf index bacbb91a..6b2881ea 100644 --- a/modules/bacannot_dbs/argminer.nf +++ b/modules/bacannot_dbs/argminer.nf @@ -1,7 +1,7 @@ process ARGMINER_DB { publishDir "${params.output}/argminer_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") diff --git a/modules/bacannot_dbs/get_zenodo.nf b/modules/bacannot_dbs/get_zenodo.nf new file mode 100644 index 00000000..65c42605 --- /dev/null +++ b/modules/bacannot_dbs/get_zenodo.nf @@ -0,0 +1,18 @@ +process GET_ZENODO_DB { + publishDir "${params.output}", mode: 'copy', overwrite: "$params.force_update" + label = [ 'db_download', 'process_low' ] + + tag "Downloading pre-built databases" + + output: + file("*") + + script: + """ + # download database from zenodo + zenodo_get https://doi.org/10.5281/zenodo.7615811 + + # organize data + tar zxvf *.tar.gz && rm *.tar.gz + """ +} \ No newline at end of file diff --git a/workflows/bacannot_dbs.nf b/workflows/bacannot_dbs.nf index c44d4d36..ca6c76e3 100644 --- a/workflows/bacannot_dbs.nf +++ b/workflows/bacannot_dbs.nf @@ -16,6 +16,7 @@ include { ICEBERG_DB } from '../modules/bacannot_dbs/iceberg.nf' include { PHAST_DB } from '../modules/bacannot_dbs/phast.nf' include { KOFAMSCAN_DB } from '../modules/bacannot_dbs/kofamscan.nf' include { ANTISMASH_DB } from '../modules/bacannot_dbs/antismash.nf' +include { GET_ZENODO_DB } from '../modules/bacannot_dbs/get_zenodo.nf' /* DEF WORKFLOW @@ -23,21 +24,25 @@ include { ANTISMASH_DB } from '../modules/bacannot_dbs/antismash.nf' workflow CREATE_DBS { - download_db("prokka", "PROKKA_DB") - download_db("mlst", "MLST_DB") - download_db("kofamscan", "KOFAMSCAN_DB") - download_db("card", "CARD_DB") - download_db("resfinder", "RESFINDER_DB") - download_db("amrfinder", "AMRFINDER_DB") - download_db("argminer", "ARGMINER_DB") - download_db("platon", "PLATON_DB") - download_db("plasmidfinder", "PLASMIDFINDER_DB") - download_db("phigaro", "PHIGARO_DB") - download_db("phast", "PHAST_DB") - download_db("vfdb", "VFDB_DB") - download_db("victors", "VICTORS_DB") - download_db("iceberg", "ICEBERG_DB") - download_db("antismash", "ANTISMASH_DB") + if ( params.get_dbs && !params.get_zenodo_db ) { + download_db("prokka", "PROKKA_DB") + download_db("mlst", "MLST_DB") + download_db("kofamscan", "KOFAMSCAN_DB") + download_db("card", "CARD_DB") + download_db("resfinder", "RESFINDER_DB") + download_db("amrfinder", "AMRFINDER_DB") + download_db("argminer", "ARGMINER_DB") + download_db("platon", "PLATON_DB") + download_db("plasmidfinder", "PLASMIDFINDER_DB") + download_db("phigaro", "PHIGARO_DB") + download_db("phast", "PHAST_DB") + download_db("vfdb", "VFDB_DB") + download_db("victors", "VICTORS_DB") + download_db("iceberg", "ICEBERG_DB") + download_db("antismash", "ANTISMASH_DB") + } else if ( !params.get_dbs && params.get_zenodo_db ) { + GET_ZENODO_DB() + } } From 5c126dc55472e8aa9801dc055cf152570ff1ddd0 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 19 May 2023 10:37:50 +0200 Subject: [PATCH 19/50] 69 tools to use own docker image (#91) * moved container configurations of assembly modules * update default flye version * update container configuration for database-setup modules * re-organize container definition of 'generic' modules * reorganize container configuration for KO modules * reorganized container configuration for MGEs modules * finalizing container configuration reorganization of last modules * containers already defined in config files * update params schema * fixed zenodo download * mob_suite singularity image not always suited for low connection servers * add option to download container configs --- README.md | 11 ++++ conf/defaults.config | 2 +- conf/docker.config | 77 +++++++++++++++++++++++++- conf/singularity.config | 80 ++++++++++++++++++++++++++- lib/WorkflowMain.groovy | 24 ++++++++ modules/KOs/kofamscan.nf | 5 -- modules/MGEs/draw_gis.nf | 1 - modules/MGEs/integron_finder.nf | 5 -- modules/MGEs/islandpath.nf | 5 -- modules/MGEs/mob_suite.nf | 5 -- modules/MGEs/plasmidfinder.nf | 5 -- modules/MGEs/platon.nf | 5 -- modules/assembly/flye.nf | 5 -- modules/assembly/unicycler.nf | 5 -- modules/bacannot_dbs/amrfinder.nf | 5 -- modules/bacannot_dbs/antismash.nf | 2 +- modules/bacannot_dbs/get_zenodo.nf | 1 + modules/bacannot_dbs/iceberg.nf | 2 +- modules/bacannot_dbs/kofamscan.nf | 2 +- modules/bacannot_dbs/phast.nf | 2 +- modules/bacannot_dbs/plasmidfinder.nf | 2 +- modules/bacannot_dbs/platon.nf | 2 +- modules/bacannot_dbs/prokka.nf | 2 +- modules/bacannot_dbs/resfinder.nf | 2 +- modules/bacannot_dbs/vfdb.nf | 2 +- modules/bacannot_dbs/victors.nf | 2 +- modules/generic/bakta.nf | 5 -- modules/generic/barrnap.nf | 5 -- modules/generic/circos.nf | 1 - modules/generic/gc_skew.nf | 1 - modules/generic/karyotype.nf | 1 - modules/generic/mash.nf | 5 -- modules/generic/merge_summaries.nf | 1 - modules/generic/mlst.nf | 5 -- modules/generic/prepare_circos.nf | 1 - modules/generic/prokka.nf | 5 -- modules/generic/sequenceserver.nf | 3 +- modules/prophages/phigaro.nf | 5 -- modules/prophages/phispy.nf | 5 -- modules/resistance/amrfinder.nf | 5 -- modules/resistance/amrfinder2tsv.nf | 1 - modules/resistance/rgi_annotation.nf | 5 -- modules/virulence/vfdb2tsv.nf | 1 - nextflow.config | 2 + nextflow_schema.json | 42 +++++++++----- 45 files changed, 233 insertions(+), 127 deletions(-) diff --git a/README.md b/README.md index 4fdfa4fd..36625099 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,17 @@ Create a configuration file in your working directory: nextflow run fmalmeida/bacannot --get_config +##### Overwrite container versions with config + +The pipeline uses pre-set docker and singularity configuration files to set all the containers and versions of images that should be used by each module in the pipeline. + +Although not recommended, one can use these configuration files to change the version of specific tools if desired. + +To download these configs one can: + + nextflow run fmalmeida/bacannot --get_docker_config + nextflow run fmalmeida/bacannot --get_singularity_config + ### Interactive graphical configuration and execution #### Via NF tower launchpad (good for cloud env execution) diff --git a/conf/defaults.config b/conf/defaults.config index 8e8aaf72..63da85be 100644 --- a/conf/defaults.config +++ b/conf/defaults.config @@ -177,7 +177,7 @@ params { // Tools that are not part of the core of the pipeline, // but can eventually be used by users unicycler_version = '0.4.8--py38h8162308_3' - flye_version = '2.9--py39h39abbe0_0' + flye_version = '2.9--py39h6935b12_1' bakta_version = '1.7.0--pyhdfd78af_1' // Max resource options diff --git a/conf/docker.config b/conf/docker.config index 33d65328..562b2158 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -13,7 +13,10 @@ docker { // specific images process { - // container with various tools for general purposes + + // + // Custom pipeline's containers with various tools for general purposes + // withLabel: 'db_download|db_tools|misc' { container = 'fmalmeida/bacannot:v3.3_misc' } @@ -31,5 +34,77 @@ process { withLabel: 'jbrowse' { container = 'fmalmeida/bacannot:jbrowse' } + + // + // Public containers used within the pipeline + // + withName: FLYE { + container = "quay.io/biocontainers/flye:${params.flye_version}" + } + + withName: UNICYCLER { + container = "quay.io/biocontainers/unicycler:${params.unicycler_version}" + } + + withName: 'AMRFINDER_DB|AMRFINDER' { + container = "ncbi/amr:3.11.2-2022-12-19.1" + } + + withName: BAKTA { + container = "quay.io/biocontainers/bakta:${params.bakta_version}" + } + + withName: BARRNAP { + container = "quay.io/biocontainers/barrnap:0.9--hdfd78af_4" + } + + withName: REFSEQ_MASHER { + container = "quay.io/biocontainers/refseq_masher:0.1.2--py_0" + } + + withName: MLST { + container = "quay.io/biocontainers/mlst:2.19.0--hdfd78af_1" + } + + withName: PROKKA { + container = "quay.io/biocontainers/prokka:1.14.6--pl5321hdfd78af_4" + } + + withName: KOFAMSCAN { + container = "quay.io/biocontainers/kofamscan:1.3.0--hdfd78af_2" + } + + withName: INTEGRON_FINDER { + container = "quay.io/biocontainers/integron_finder:2.0.1--pyhdfd78af_0" + } + + withName: ISLANDPATH { + container = "quay.io/biocontainers/islandpath:1.0.6--hdfd78af_0" + } + + withName: MOBSUITE { + container = "quay.io/biocontainers/mob_suite:3.1.4--pyhdfd78af_0" + } + + withName: PLASMIDFINDER { + container = "quay.io/biocontainers/plasmidfinder:2.1.6--py310hdfd78af_1" + } + + withName: PLATON { + container = "quay.io/biocontainers/platon:1.6--pyhdfd78af_1" + } + + withName: PHIGARO { + container = "quay.io/biocontainers/phigaro:2.3.0--pyh7b7c402_0" + } + + withName: PHISPY { + container = "quay.io/biocontainers/phispy:4.2.21--py39h7cff6ad_0" + } + + withName: CARD_RGI { + container = "quay.io/biocontainers/rgi:5.2.1--pyhdfd78af_1" + } + } diff --git a/conf/singularity.config b/conf/singularity.config index 1a53bb76..35707df6 100644 --- a/conf/singularity.config +++ b/conf/singularity.config @@ -12,8 +12,11 @@ env.SINGULARITY_DISABLE_CACHE = 1 */ // specific images -process { - // container with various tools for general purposes +process { + + // + // Custom pipeline's containers with various tools for general purposes + // withLabel: 'db_download|db_tools|misc' { container = 'docker://fmalmeida/bacannot:v3.3_misc' } @@ -31,5 +34,78 @@ process { withLabel: 'jbrowse' { container = 'docker://fmalmeida/bacannot:jbrowse' } + + // + // Public containers used within the pipeline + // + withName: FLYE { + container = "https://depot.galaxyproject.org/singularity/flye:${params.flye_version}" + } + + withName: UNICYCLER { + container = "https://depot.galaxyproject.org/singularity/unicycler:${params.unicycler_version}" + } + + withName: 'AMRFINDER_DB|AMRFINDER' { + container = "docker://ncbi/amr:3.11.2-2022-12-19.1" + } + + withName: BAKTA { + container = "https://depot.galaxyproject.org/singularity/bakta:${params.bakta_version}" + } + + withName: BARRNAP { + container = "https://depot.galaxyproject.org/singularity/barrnap:0.9--hdfd78af_4" + } + + withName: REFSEQ_MASHER { + container = "https://depot.galaxyproject.org/singularity/refseq_masher:0.1.2--py_0" + } + + withName: MLST { + container = "https://depot.galaxyproject.org/singularity/mlst:2.19.0--hdfd78af_1" + } + + withName: PROKKA { + container = "https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5321hdfd78af_4" + } + + withName: KOFAMSCAN { + container = "https://depot.galaxyproject.org/singularity/kofamscan:1.3.0--hdfd78af_2" + } + + withName: INTEGRON_FINDER { + container = "https://depot.galaxyproject.org/singularity/integron_finder:2.0.1--pyhdfd78af_0" + } + + withName: ISLANDPATH { + container = "https://depot.galaxyproject.org/singularity/islandpath:1.0.6--hdfd78af_0" + } + + withName: MOBSUITE { + // container = "https://depot.galaxyproject.org/singularity/mob_suite:3.1.4--pyhdfd78af_0" + container = "docker://quay.io/biocontainers/mob_suite:3.1.4--pyhdfd78af_0" + } + + withName: PLASMIDFINDER { + container = "https://depot.galaxyproject.org/singularity/plasmidfinder:2.1.6--py310hdfd78af_1" + } + + withName: PLATON { + container = "https://depot.galaxyproject.org/singularity/platon:1.6--pyhdfd78af_1" + } + + withName: PHIGARO { + container = "https://depot.galaxyproject.org/singularity/phigaro:2.3.0--pyh7b7c402_0" + } + + withName: PHISPY { + container = "https://depot.galaxyproject.org/singularity/phispy:4.2.21--py39h7cff6ad_0" + } + + withName: CARD_RGI { + container = "https://depot.galaxyproject.org/singularity/rgi:5.2.1--pyhdfd78af_1" + } + } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 7531147f..57d85384 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -74,6 +74,30 @@ class WorkflowMain { System.exit(0) } + // Download docker config + if (params.get_docker_config) { + new File("docker.config").write(new URL ("https://github.com/fmalmeida/bacannot/raw/master/conf/docker.config").getText()) + log.info """ + docker.config file saved in working directory + After configuration, run: + nextflow run fmalmeida/bacannot -c ./docker.config + Nice code + """.stripIndent() + System.exit(0) + } + + // Download singularity config + if (params.get_singularity_config) { + new File("singularity.config").write(new URL ("https://github.com/fmalmeida/bacannot/raw/master/conf/singularity.config").getText()) + log.info """ + singularity.config file saved in working directory + After configuration, run: + nextflow run fmalmeida/bacannot -c ./singularity.config + Nice code + """.stripIndent() + System.exit(0) + } + // Validate workflow parameters via the JSON schema if (params.validate_params) { NfcoreSchema.validateParameters(workflow, params, log) diff --git a/modules/KOs/kofamscan.nf b/modules/KOs/kofamscan.nf index 35aff276..cd07f66e 100644 --- a/modules/KOs/kofamscan.nf +++ b/modules/KOs/kofamscan.nf @@ -6,11 +6,6 @@ process KOFAMSCAN { tag "${prefix}" label = [ 'process_high', 'error_retry' ] - conda "bioconda::kofamscan=1.3.0" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/kofamscan:1.3.0--hdfd78af_2' : - 'quay.io/biocontainers/kofamscan:1.3.0--hdfd78af_2' }" - input: tuple val(prefix), file('proteins.faa') file(bacannot_db) diff --git a/modules/MGEs/draw_gis.nf b/modules/MGEs/draw_gis.nf index ff64cfd2..13277613 100644 --- a/modules/MGEs/draw_gis.nf +++ b/modules/MGEs/draw_gis.nf @@ -5,7 +5,6 @@ process DRAW_GIS { } tag "${prefix}" label = [ 'misc', 'process_ultralow' ] - input: tuple val(prefix), file(gff), file(gis_bed) diff --git a/modules/MGEs/integron_finder.nf b/modules/MGEs/integron_finder.nf index 910f503d..b06fccdf 100644 --- a/modules/MGEs/integron_finder.nf +++ b/modules/MGEs/integron_finder.nf @@ -6,11 +6,6 @@ process INTEGRON_FINDER { tag "${prefix}" label = [ 'process_medium' ] - conda "bioconda::integron_finder=2.0.1" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/integron_finder:2.0.1--pyhdfd78af_0' : - 'quay.io/biocontainers/integron_finder:2.0.1--pyhdfd78af_0' }" - input: tuple val(prefix), file(genome) diff --git a/modules/MGEs/islandpath.nf b/modules/MGEs/islandpath.nf index d2f15a1b..d9ef1714 100644 --- a/modules/MGEs/islandpath.nf +++ b/modules/MGEs/islandpath.nf @@ -5,11 +5,6 @@ process ISLANDPATH { errorStrategy = 'retry' maxRetries = 5 - conda "bioconda::platon=1.6" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/islandpath:1.0.6--hdfd78af_0' : - 'quay.io/biocontainers/islandpath:1.0.6--hdfd78af_0' }" - input: tuple val(prefix), file("annotation.gbk") diff --git a/modules/MGEs/mob_suite.nf b/modules/MGEs/mob_suite.nf index e59adee0..14256e92 100644 --- a/modules/MGEs/mob_suite.nf +++ b/modules/MGEs/mob_suite.nf @@ -6,11 +6,6 @@ process MOBSUITE { tag "${prefix}" label = [ 'process_medium' ] - conda "bioconda::mob_suite=3.1.4" - container "${ workflow.containerEngine == 'singularity' ? - 'docker://quay.io/biocontainers/mob_suite:3.1.4--pyhdfd78af_0' : - 'quay.io/biocontainers/mob_suite:3.1.4--pyhdfd78af_0' }" - input: tuple val(prefix), file(genome) diff --git a/modules/MGEs/plasmidfinder.nf b/modules/MGEs/plasmidfinder.nf index 44f344b1..91580170 100644 --- a/modules/MGEs/plasmidfinder.nf +++ b/modules/MGEs/plasmidfinder.nf @@ -6,11 +6,6 @@ process PLASMIDFINDER { tag "${prefix}" label = [ 'process_low' ] - conda "bioconda::plasmidfinder=2.1.6" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/plasmidfinder:2.1.6--py310hdfd78af_1' : - 'quay.io/biocontainers/plasmidfinder:2.1.6--py310hdfd78af_1' }" - input: tuple val(prefix), file(genome) file(bacannot_db) diff --git a/modules/MGEs/platon.nf b/modules/MGEs/platon.nf index 84d9cfcf..e4be53ec 100644 --- a/modules/MGEs/platon.nf +++ b/modules/MGEs/platon.nf @@ -7,11 +7,6 @@ process PLATON { tag "${prefix}" label = [ 'process_medium' ] - conda "bioconda::platon=1.6" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/platon:1.6--pyhdfd78af_1' : - 'quay.io/biocontainers/platon:1.6--pyhdfd78af_1' }" - input: tuple val(prefix), file(genome) file(bacannot_db) diff --git a/modules/assembly/flye.nf b/modules/assembly/flye.nf index 29976210..5bbc4668 100644 --- a/modules/assembly/flye.nf +++ b/modules/assembly/flye.nf @@ -7,11 +7,6 @@ process FLYE { label = [ 'process_high', 'error_retry' ] tag "${prefix}" - conda "bioconda::flye=2.9" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1' : - 'quay.io/biocontainers/flye:2.9--py39h6935b12_1' }" - input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), val(assembly), val(resfinder_species) diff --git a/modules/assembly/unicycler.nf b/modules/assembly/unicycler.nf index 25ff97d6..032a3957 100644 --- a/modules/assembly/unicycler.nf +++ b/modules/assembly/unicycler.nf @@ -7,11 +7,6 @@ process UNICYCLER { label = [ 'process_high', 'error_retry' ] tag "${prefix}" - conda "bioconda::unicycler=0.4.8" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/unicycler:0.5.0--py39h2add14b_2' : - 'quay.io/biocontainers/unicycler:0.5.0--py39h2add14b_2' }" - input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), val(assembly), val(resfinder_species) diff --git a/modules/bacannot_dbs/amrfinder.nf b/modules/bacannot_dbs/amrfinder.nf index 7f4ffbaf..c5039973 100644 --- a/modules/bacannot_dbs/amrfinder.nf +++ b/modules/bacannot_dbs/amrfinder.nf @@ -2,11 +2,6 @@ process AMRFINDER_DB { publishDir "${params.output}/amrfinder_db", mode: 'copy', overwrite: "$params.force_update" label 'process_ultralow' - conda "bioconda::ncbi-amrfinderplus=3.11.2" - container "${ workflow.containerEngine == 'singularity' ? - 'docker://ncbi/amr:3.11.2-2022-12-19.1' : - 'ncbi/amr:3.11.2-2022-12-19.1' }" - output: file("*") diff --git a/modules/bacannot_dbs/antismash.nf b/modules/bacannot_dbs/antismash.nf index 9f0fdd2f..5e9b8962 100644 --- a/modules/bacannot_dbs/antismash.nf +++ b/modules/bacannot_dbs/antismash.nf @@ -1,7 +1,7 @@ process ANTISMASH_DB { publishDir "${params.output}/antismash_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") diff --git a/modules/bacannot_dbs/get_zenodo.nf b/modules/bacannot_dbs/get_zenodo.nf index 65c42605..cb9aab4f 100644 --- a/modules/bacannot_dbs/get_zenodo.nf +++ b/modules/bacannot_dbs/get_zenodo.nf @@ -14,5 +14,6 @@ process GET_ZENODO_DB { # organize data tar zxvf *.tar.gz && rm *.tar.gz + rm -rf \$( find . -name 'pipeline_info' ) """ } \ No newline at end of file diff --git a/modules/bacannot_dbs/iceberg.nf b/modules/bacannot_dbs/iceberg.nf index f7f859b8..1cebc233 100644 --- a/modules/bacannot_dbs/iceberg.nf +++ b/modules/bacannot_dbs/iceberg.nf @@ -1,7 +1,7 @@ process ICEBERG_DB { publishDir "${params.output}/iceberg_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") diff --git a/modules/bacannot_dbs/kofamscan.nf b/modules/bacannot_dbs/kofamscan.nf index 0e010adc..2a5e6303 100644 --- a/modules/bacannot_dbs/kofamscan.nf +++ b/modules/bacannot_dbs/kofamscan.nf @@ -1,7 +1,7 @@ process KOFAMSCAN_DB { publishDir "${params.output}/kofamscan_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_low' ] - + output: file("*") diff --git a/modules/bacannot_dbs/phast.nf b/modules/bacannot_dbs/phast.nf index 6e1771e7..0c7587ef 100644 --- a/modules/bacannot_dbs/phast.nf +++ b/modules/bacannot_dbs/phast.nf @@ -1,7 +1,7 @@ process PHAST_DB { publishDir "${params.output}/phast_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") diff --git a/modules/bacannot_dbs/plasmidfinder.nf b/modules/bacannot_dbs/plasmidfinder.nf index 18e778b4..05073cad 100644 --- a/modules/bacannot_dbs/plasmidfinder.nf +++ b/modules/bacannot_dbs/plasmidfinder.nf @@ -1,7 +1,7 @@ process PLASMIDFINDER_DB { publishDir "${params.output}", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") diff --git a/modules/bacannot_dbs/platon.nf b/modules/bacannot_dbs/platon.nf index e8e40c77..2d30881d 100644 --- a/modules/bacannot_dbs/platon.nf +++ b/modules/bacannot_dbs/platon.nf @@ -1,7 +1,7 @@ process PLATON_DB { publishDir "${params.output}/platon_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_low' ] - + output: file("*") diff --git a/modules/bacannot_dbs/prokka.nf b/modules/bacannot_dbs/prokka.nf index 594e7f59..f82a24e2 100644 --- a/modules/bacannot_dbs/prokka.nf +++ b/modules/bacannot_dbs/prokka.nf @@ -1,7 +1,7 @@ process PROKKA_DB { publishDir "${params.output}/prokka_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_low' ] - + output: file("*") diff --git a/modules/bacannot_dbs/resfinder.nf b/modules/bacannot_dbs/resfinder.nf index 46914a12..2c2d1f43 100644 --- a/modules/bacannot_dbs/resfinder.nf +++ b/modules/bacannot_dbs/resfinder.nf @@ -1,7 +1,7 @@ process RESFINDER_DB { publishDir "${params.output}/resfinder_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") diff --git a/modules/bacannot_dbs/vfdb.nf b/modules/bacannot_dbs/vfdb.nf index 6b9112f5..2a1673d9 100644 --- a/modules/bacannot_dbs/vfdb.nf +++ b/modules/bacannot_dbs/vfdb.nf @@ -1,7 +1,7 @@ process VFDB_DB { publishDir "${params.output}/vfdb_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") diff --git a/modules/bacannot_dbs/victors.nf b/modules/bacannot_dbs/victors.nf index 6d9aa0f9..b5c28409 100644 --- a/modules/bacannot_dbs/victors.nf +++ b/modules/bacannot_dbs/victors.nf @@ -1,7 +1,7 @@ process VICTORS_DB { publishDir "${params.output}/victors_db", mode: 'copy', overwrite: "$params.force_update" label = [ 'db_download', 'process_ultralow' ] - + output: file("*") diff --git a/modules/generic/bakta.nf b/modules/generic/bakta.nf index 67db3569..8d673ca2 100644 --- a/modules/generic/bakta.nf +++ b/modules/generic/bakta.nf @@ -7,11 +7,6 @@ process BAKTA { tag "${prefix}" label = [ 'process_medium', 'error_retry' ] - conda "bioconda::bakta=1.7.0" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/bakta:1.7.0--pyhdfd78af_1' : - 'quay.io/biocontainers/bakta:1.7.0--pyhdfd78af_1' }" - input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), file(assembly), val(resfinder_species) file(bakta_db) diff --git a/modules/generic/barrnap.nf b/modules/generic/barrnap.nf index 147a2ac0..b646ab91 100644 --- a/modules/generic/barrnap.nf +++ b/modules/generic/barrnap.nf @@ -6,11 +6,6 @@ process BARRNAP { tag "${prefix}" label = [ 'process_low' ] - conda "bioconda::barrnap=0.9" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/barrnap:0.9--hdfd78af_4 ' : - 'quay.io/biocontainers/barrnap:0.9--hdfd78af_4' }" - input: tuple val(prefix), file(genome) diff --git a/modules/generic/circos.nf b/modules/generic/circos.nf index e36db10e..c0c7ccfe 100644 --- a/modules/generic/circos.nf +++ b/modules/generic/circos.nf @@ -4,7 +4,6 @@ process CIRCOS { else "$filename" } tag "$prefix" - label = [ 'misc', 'process_low' ] input: diff --git a/modules/generic/gc_skew.nf b/modules/generic/gc_skew.nf index f6db9dda..68ea6170 100644 --- a/modules/generic/gc_skew.nf +++ b/modules/generic/gc_skew.nf @@ -1,6 +1,5 @@ process GC_SKEW { tag "$prefix" - label = [ 'misc', 'process_low' ] input: diff --git a/modules/generic/karyotype.nf b/modules/generic/karyotype.nf index eb8eb8d9..cc2b37ad 100644 --- a/modules/generic/karyotype.nf +++ b/modules/generic/karyotype.nf @@ -1,6 +1,5 @@ process MAKE_KARYOTYPE { tag "$prefix" - label = [ 'misc', 'process_low' ] input: diff --git a/modules/generic/mash.nf b/modules/generic/mash.nf index 0273d2bc..b2ca8961 100644 --- a/modules/generic/mash.nf +++ b/modules/generic/mash.nf @@ -6,11 +6,6 @@ process REFSEQ_MASHER { tag "${prefix}" label = [ 'process_low' ] - conda "bioconda::refseq_masher=0.1.2" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/refseq_masher:0.1.2--py_0' : - 'quay.io/biocontainers/refseq_masher:0.1.2--py_0' }" - input: tuple val(prefix), path(genome) diff --git a/modules/generic/merge_summaries.nf b/modules/generic/merge_summaries.nf index d4437b10..55c50389 100644 --- a/modules/generic/merge_summaries.nf +++ b/modules/generic/merge_summaries.nf @@ -1,7 +1,6 @@ process MERGE_SUMMARIES { publishDir "${params.output}", mode: 'copy' label = [ 'misc', 'process_low' ] - input: path(summaries) diff --git a/modules/generic/mlst.nf b/modules/generic/mlst.nf index 9b67121a..6a4ced52 100644 --- a/modules/generic/mlst.nf +++ b/modules/generic/mlst.nf @@ -6,11 +6,6 @@ process MLST { tag "${prefix}" label = [ 'process_ultralow' ] - conda "bioconda::mlst=2.19.0" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/mlst:2.19.0--hdfd78af_1' : - 'quay.io/biocontainers/mlst:2.19.0--hdfd78af_1' }" - input: tuple val(prefix), file(genome) file(bacannot_db) diff --git a/modules/generic/prepare_circos.nf b/modules/generic/prepare_circos.nf index 8afb64df..eec1f513 100644 --- a/modules/generic/prepare_circos.nf +++ b/modules/generic/prepare_circos.nf @@ -1,6 +1,5 @@ process PREPARE_CIRCOS { tag "$prefix" - label = [ 'misc', 'process_low' ] input: diff --git a/modules/generic/prokka.nf b/modules/generic/prokka.nf index 9dc687cc..bd31e81d 100644 --- a/modules/generic/prokka.nf +++ b/modules/generic/prokka.nf @@ -7,11 +7,6 @@ process PROKKA { tag "${prefix}" label = [ 'process_medium' ] - conda "bioconda::prokka=1.14.6" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5262hdfd78af_3' : - 'quay.io/biocontainers/prokka:1.14.6--pl5262hdfd78af_3' }" - input: tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), file(assembly), val(resfinder_species) file(bacannot_db) diff --git a/modules/generic/sequenceserver.nf b/modules/generic/sequenceserver.nf index 2663ef97..2048dffe 100644 --- a/modules/generic/sequenceserver.nf +++ b/modules/generic/sequenceserver.nf @@ -1,8 +1,7 @@ process SEQUENCESERVER { publishDir "${params.output}/${prefix}/SequenceServerDBs", mode: 'copy' tag "${prefix}" - label = [ 'server', 'process_ultralow' ] - + label = [ 'server', 'process_ultralow' ] input: tuple val(prefix), file(genome), file(genes), file(proteins) diff --git a/modules/prophages/phigaro.nf b/modules/prophages/phigaro.nf index 4b7cca21..915a2ec7 100644 --- a/modules/prophages/phigaro.nf +++ b/modules/prophages/phigaro.nf @@ -6,11 +6,6 @@ process PHIGARO { tag "${prefix}" label = [ 'process_medium' ] - conda "bioconda::phigaro=2.3.0" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/phigaro:2.3.0--pyh7b7c402_0' : - 'quay.io/biocontainers/phigaro:2.3.0--pyh7b7c402_0' }" - input: tuple val(prefix), file("assembly.fasta") file(bacannot_db) diff --git a/modules/prophages/phispy.nf b/modules/prophages/phispy.nf index 357e4838..ef32fc90 100644 --- a/modules/prophages/phispy.nf +++ b/modules/prophages/phispy.nf @@ -7,11 +7,6 @@ process PHISPY { tag "${prefix}" label = [ 'process_medium' ] - conda "bioconda::phispy=4.2.21" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/phispy:4.2.21--py39h7cff6ad_0' : - 'quay.io/biocontainers/phispy:4.2.21--py39h7cff6ad_0' }" - input: tuple val(prefix), file(input) diff --git a/modules/resistance/amrfinder.nf b/modules/resistance/amrfinder.nf index 70e9aac4..6f36e228 100644 --- a/modules/resistance/amrfinder.nf +++ b/modules/resistance/amrfinder.nf @@ -6,11 +6,6 @@ process AMRFINDER { tag "${prefix}" label = [ 'process_medium' ] - conda "bioconda::ncbi-amrfinderplus=3.11.2" - container "${ workflow.containerEngine == 'singularity' ? - 'docker://ncbi/amr:3.11.2-2022-12-19.1' : - 'ncbi/amr:3.11.2-2022-12-19.1' }" - input: tuple val(prefix), file(proteins) file(bacannot_db) diff --git a/modules/resistance/amrfinder2tsv.nf b/modules/resistance/amrfinder2tsv.nf index 51a09c37..41f52854 100644 --- a/modules/resistance/amrfinder2tsv.nf +++ b/modules/resistance/amrfinder2tsv.nf @@ -1,6 +1,5 @@ process AMRFINDER2TSV { tag "$prefix" - label = [ 'renv', 'process_low' ] input: diff --git a/modules/resistance/rgi_annotation.nf b/modules/resistance/rgi_annotation.nf index 11e85c39..9ae28955 100644 --- a/modules/resistance/rgi_annotation.nf +++ b/modules/resistance/rgi_annotation.nf @@ -7,11 +7,6 @@ process CARD_RGI { tag "${prefix}" label = [ 'process_medium' ] - conda "bioconda::rgi=5.2.1" - container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/rgi:5.2.1--pyhdfd78af_1' : - 'quay.io/biocontainers/rgi:5.2.1--pyhdfd78af_1' }" - input: tuple val(prefix), path(input) path(bacannot_db) diff --git a/modules/virulence/vfdb2tsv.nf b/modules/virulence/vfdb2tsv.nf index 595e1548..3a27daa6 100644 --- a/modules/virulence/vfdb2tsv.nf +++ b/modules/virulence/vfdb2tsv.nf @@ -1,6 +1,5 @@ process VFDB2TSV { tag "$prefix" - label = [ 'renv', 'process_low' ] input: diff --git a/nextflow.config b/nextflow.config index 12dddf36..1afb0454 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,6 +18,8 @@ params { monochrome_logs = false help = false get_config = false + get_docker_config = false + get_singularity_config = false get_samplesheet = false validate_params = true show_hidden_params = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 5b05992b..ede2129b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,10 +11,16 @@ "default": "", "properties": { "get_dbs": { - "type": "boolean" + "type": "boolean", + "description": "Download and build all the required databases on the fly (get today's version)" }, "force_update": { - "type": "boolean" + "type": "boolean", + "description": "Should we overwriting existing databases if any?" + }, + "get_zenodo_db": { + "type": "boolean", + "description": "Download latest pre-built databases from Zenodo?" } } }, @@ -167,7 +173,7 @@ "plasmids_minid": { "type": "number", "description": "Identity threshold for plasmid annotation", - "default": 90.0, + "default": 90, "minimum": 0, "maximum": 100, "help_text": "Must be between 0 and 100", @@ -176,7 +182,7 @@ "plasmids_mincov": { "type": "number", "description": "overage threshold for plasmid annotation", - "default": 60.0, + "default": 60, "minimum": 0, "maximum": 100, "help_text": "Must be between 0 and 100", @@ -185,7 +191,7 @@ "blast_virulence_minid": { "type": "number", "description": "Identity threshold for virulence factors annotation", - "default": 90.0, + "default": 90, "minimum": 0, "maximum": 100, "help_text": "Must be between 0 and 100", @@ -194,7 +200,7 @@ "blast_virulence_mincov": { "type": "number", "description": "overage threshold for virulence factors annotation", - "default": 90.0, + "default": 90, "minimum": 0, "maximum": 100, "help_text": "Must be between 0 and 100", @@ -203,7 +209,7 @@ "blast_resistance_minid": { "type": "number", "description": "Identity threshold for resistance genes annotation", - "default": 90.0, + "default": 90, "minimum": 0, "maximum": 100, "help_text": "Must be between 0 and 100", @@ -212,7 +218,7 @@ "blast_resistance_mincov": { "type": "number", "description": "overage threshold for resistance genes annotation", - "default": 90.0, + "default": 90, "minimum": 0, "maximum": 100, "help_text": "Must be between 0 and 100", @@ -221,7 +227,7 @@ "blast_MGEs_minid": { "type": "number", "description": "Identity threshold for ICEs and prophages annotation", - "default": 85.0, + "default": 85, "minimum": 0, "maximum": 100, "help_text": "Must be between 0 and 100", @@ -230,7 +236,7 @@ "blast_MGEs_mincov": { "type": "number", "description": "overage threshold for ICEs and prophages annotation", - "default": 85.0, + "default": 85, "minimum": 0, "maximum": 100, "help_text": "Must be between 0 and 100", @@ -260,7 +266,7 @@ "blast_custom_minid": { "type": "number", "description": "Min. identity % for the annotation using user's custom database", - "default": 65.0, + "default": 65, "minimum": 0, "maximum": 100, "hidden": true @@ -268,7 +274,7 @@ "blast_custom_mincov": { "type": "number", "description": "Min. gene/subject coverage % for the annotation using user's custom database", - "default": 65.0, + "default": 65, "minimum": 0, "maximum": 100, "hidden": true @@ -292,6 +298,16 @@ "description": "Download template config for parameters", "fa_icon": "fas fa-question-circle" }, + "get_docker_config": { + "type": "boolean", + "description": "Download template docker config for containers.", + "fa_icon": "fas fa-question-circle" + }, + "get_singularity_config": { + "type": "boolean", + "description": "Download template singularity config for containers.", + "fa_icon": "fas fa-question-circle" + }, "get_samplesheet": { "type": "boolean", "fa_icon": "fas fa-question-circle", @@ -331,7 +347,7 @@ "flye_version": { "type": "string", "description": "Select quay.io image tag for tool", - "default": "2.9--py39h39abbe0_0" + "default": "2.9--py39h6935b12_1" }, "bakta_version": { "type": "string", From dd9c4f2a375da9392c9fce38f382eb3305185141 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 20 Jul 2023 12:28:52 +0000 Subject: [PATCH 20/50] update unicycler version (0.5.0--py310h6cc9453_3) --- conf/defaults.config | 2 +- markdown/CHANGELOG.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/defaults.config b/conf/defaults.config index 63da85be..e68f37b8 100644 --- a/conf/defaults.config +++ b/conf/defaults.config @@ -176,7 +176,7 @@ params { // Select versions of bioconda quay.io additional tools // Tools that are not part of the core of the pipeline, // but can eventually be used by users - unicycler_version = '0.4.8--py38h8162308_3' + unicycler_version = '0.5.0--py310h6cc9453_3' flye_version = '2.9--py39h6935b12_1' bakta_version = '1.7.0--pyhdfd78af_1' diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index fcd33376..f82a9b3b 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -8,6 +8,7 @@ The tracking for changes started in v2.1 * [#69](https://github.com/fmalmeida/bacannot/issues/69) -- Change how tools use docker images in order to: * make tools use public bioconda images whenever possible to allow easy addition of tools and avoid much conflicts in docker images * dimish the size and tools inside the docker images, the docker images now are only built to contain tools and all required for modules that cannot just use bioconda docker images. +* Update unicycler docker image to latest '0.5.0--py310h6cc9453_3' to avoid errors originated from previous image containing buggy installation. ## v3.2 [19-December-2022] * Fixes https://github.com/fmalmeida/bacannot/issues/68 reported by @lam-c From 1ed4a8e4c4a268b7c35130dad4b10ab0139c7582 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 26 Jul 2023 07:14:17 -0400 Subject: [PATCH 21/50] 96 error summary for bugfix release (#101) Update falmeida-py version --- conf/defaults.config | 2 +- docker/misc/Dockerfile | 2 +- nextflow_schema.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/defaults.config b/conf/defaults.config index e68f37b8..56baa751 100644 --- a/conf/defaults.config +++ b/conf/defaults.config @@ -185,4 +185,4 @@ params { max_cpus = 16 max_time = '40.h' -} \ No newline at end of file +} diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index 7e718db0..2237ee0a 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -21,7 +21,7 @@ RUN mamba install -y \ seqkit \ bioawk \ 'easy_circos>=0.3' \ - 'falmeida-py>=1.2' \ + 'falmeida-py>=1.2.2' \ 'conda-forge::openssl>=1.1.1' \ emboss \ libtiff \ diff --git a/nextflow_schema.json b/nextflow_schema.json index ede2129b..a3866ce9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -342,7 +342,7 @@ "unicycler_version": { "type": "string", "description": "Select quay.io image tag for tool", - "default": "0.4.8--py38h8162308_3" + "default": "0.5.0--py310h6cc9453_3" }, "flye_version": { "type": "string", From 9dc44c5458729f2b7c7706dd1315b3948f730908 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 18 Sep 2023 22:58:08 +0200 Subject: [PATCH 22/50] 98 include ices and prophage annotation in json summary (#106) * Try Dockerfile fix * Update Dockerfile * Update Dockerfile * Update CHANGELOG.md --- docker/misc/Dockerfile | 35 +++++++++++++++++++++-------------- markdown/CHANGELOG.md | 4 +++- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index 2237ee0a..a74c03c1 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -3,15 +3,15 @@ LABEL authors="Felipe Almeida" \ description="Docker image containing any-based bacannot tools" # install mamba -RUN conda update -n root conda --yes && \ - conda update -n base conda --yes && \ - conda install -c conda-forge -y 'mamba>=1.4' +RUN conda install -n base -c conda-forge 'mamba=1.5' --yes && \ + conda clean -afy +RUN pip install --upgrade pip # Install the conda environment RUN mamba install -y \ -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ --no-channel-priority \ - 'conda-forge::python>=3.7' \ + 'python=3.9' \ 'blast>=2.12' \ 'diamond>=2.0.15' \ 'bedtools>=2.30' \ @@ -20,21 +20,26 @@ RUN mamba install -y \ 'biopython==1.78' \ seqkit \ bioawk \ - 'easy_circos>=0.3' \ - 'falmeida-py>=1.2.2' \ - 'conda-forge::openssl>=1.1.1' \ + 'easy_circos==0.4' \ + 'conda-forge::openssl>=1.1' \ + 'pyproj=3.2' \ emboss \ libtiff \ jq && \ mamba clean -afy +RUN git clone https://github.com/fmalmeida/pythonScripts.git && \ + cd pythonScripts && \ + pip install . && \ + falmeida-py --help # Install samtools RUN apt-get update -y && apt-get install -y samtools # Install gff-toolbox -RUN git clone https://github.com/fmalmeida/gff-toolbox.git && \ - cd gff-toolbox && \ - python setup.py install && \ +RUN git clone https://github.com/fmalmeida/gff-toolbox.git +RUN cd gff-toolbox && \ + python3 -m pip install --upgrade pip 'matplotlib==3.7.3' && \ + python3 setup.py install && \ gff-toolbox -h # Create env for digIS @@ -75,14 +80,16 @@ COPY custom_fix_grange_digis.py /work/digIS/src/common/grange.py ENV PATH=/work/digIS:$PATH # Create env for antismash -RUN conda create -y -n antismash -c bioconda -c conda-forge \ - 'antismash>=6' 'anaconda::jinja2' 'anaconda::markupsafe' nomkl && \ +RUN mamba create -y -n antismash -c bioconda -c conda-forge \ + 'bioconda::antismash-lite>=6' 'anaconda::flask' 'anaconda::jinja2' 'anaconda::markupsafe' nomkl && \ rm -rf /opt/conda/envs/antismash/lib/*/site-packages/antismash/databases && \ mamba clean -afy # fix bioperl -RUN mamba create -n perl -y -c bioconda -c conda-forge perl-bioperl perl-app-cpanminus perl-yaml -RUN conda run -n perl PERL5LIB= PERL_LOCAL_LIB_ROOT= cpanm Bio::Root::RootI +RUN mamba create -n perl -y \ + -c bioconda -c conda-forge -c anaconda -c defaults \ + perl-bioperl perl-app-cpanminus perl-yaml +RUN mamba run -n perl PERL5LIB= PERL_LOCAL_LIB_ROOT= cpanm Bio::Root::RootI # fix python RUN python3 -m pip install cryptography==38.0.4 diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index f82a9b3b..22ee193d 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -5,10 +5,12 @@ The tracking for changes started in v2.1 ## v3.3 [TBD] * [[#50](https://github.com/fmalmeida/bacannot/issues/50)] -- Add `Integron Finder` tool to the pipeline -* [#69](https://github.com/fmalmeida/bacannot/issues/69) -- Change how tools use docker images in order to: +* [[#69](https://github.com/fmalmeida/bacannot/issues/69)] -- Change how tools use docker images in order to: * make tools use public bioconda images whenever possible to allow easy addition of tools and avoid much conflicts in docker images * dimish the size and tools inside the docker images, the docker images now are only built to contain tools and all required for modules that cannot just use bioconda docker images. +* [[#98](https://github.com/fmalmeida/bacannot/issues/98)] -- Add ICEberg and PHAST blastp results to json summary * Update unicycler docker image to latest '0.5.0--py310h6cc9453_3' to avoid errors originated from previous image containing buggy installation. + ## v3.2 [19-December-2022] * Fixes https://github.com/fmalmeida/bacannot/issues/68 reported by @lam-c From 78d0f4fce5819207d049828274309d3854d06ded Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 25 Sep 2023 15:46:54 -0400 Subject: [PATCH 23/50] 100 update pipeline docker images from docker tags to docker shasum (#108) * fix singularity run options * fix misc dockerfile * update renv docker image environment * update docker images to use shasum * Update CHANGELOG.md --- conf/docker.config | 8 ++++---- conf/singularity.config | 10 +++++----- docker/misc/Dockerfile | 4 ++-- docker/renv/Dockerfile | 4 ++-- markdown/CHANGELOG.md | 1 + 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/conf/docker.config b/conf/docker.config index 562b2158..28ffa1ab 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -18,21 +18,21 @@ process { // Custom pipeline's containers with various tools for general purposes // withLabel: 'db_download|db_tools|misc' { - container = 'fmalmeida/bacannot:v3.3_misc' + container = 'fmalmeida/bacannot@sha256:726e085f1bd71b47c2d8a38fd46d812aab7eb8978bab7bf3cde3aa2b7b3e0f2c' } // container for R tools withLabel: 'renv' { - container = 'fmalmeida/bacannot:v3.3_renv' + container = 'fmalmeida/bacannot@sha256:952f58a2c03e50f8a376073346fb1ccda28d6249e3fdfea07a3286a6ff1adf0c' } // container for bacannot server withLabel: 'server' { - container = 'fmalmeida/bacannot:server' + container = 'fmalmeida/bacannot@sha256:0ec3b289d6e0c624556d125b2ed9b63499178e266a315175fd87cf020a402898' } withLabel: 'jbrowse' { - container = 'fmalmeida/bacannot:jbrowse' + container = 'fmalmeida/bacannot@sha256:6afdca17b561bf212c1f976422aee3fe047563c32a15112a6262556d1f75201e' } // diff --git a/conf/singularity.config b/conf/singularity.config index 35707df6..f75a7559 100644 --- a/conf/singularity.config +++ b/conf/singularity.config @@ -1,7 +1,7 @@ // Container usage and permission docker.enabled = false singularity.enabled = true -// singularity.runOptions = '--writable-tmpfs -B $PWD' +singularity.runOptions = '--writable-tmpfs -e --no-home -B $PWD' singularity.autoMounts = true env.SINGULARITY_DISABLE_CACHE = 1 @@ -18,21 +18,21 @@ process { // Custom pipeline's containers with various tools for general purposes // withLabel: 'db_download|db_tools|misc' { - container = 'docker://fmalmeida/bacannot:v3.3_misc' + container = 'docker://fmalmeida/bacannot@sha256:726e085f1bd71b47c2d8a38fd46d812aab7eb8978bab7bf3cde3aa2b7b3e0f2c' } // container for R tools withLabel: 'renv' { - container = 'docker://fmalmeida/bacannot:v3.3_renv' + container = 'docker://fmalmeida/bacannot@sha256:952f58a2c03e50f8a376073346fb1ccda28d6249e3fdfea07a3286a6ff1adf0c' } // container for bacannot server withLabel: 'server' { - container = 'docker://fmalmeida/bacannot:server' + container = 'docker://fmalmeida/bacannot@sha256:0ec3b289d6e0c624556d125b2ed9b63499178e266a315175fd87cf020a402898' } withLabel: 'jbrowse' { - container = 'docker://fmalmeida/bacannot:jbrowse' + container = 'docker://fmalmeida/bacannot@sha256:6afdca17b561bf212c1f976422aee3fe047563c32a15112a6262556d1f75201e' } // diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index a74c03c1..3e7c3422 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -81,7 +81,7 @@ ENV PATH=/work/digIS:$PATH # Create env for antismash RUN mamba create -y -n antismash -c bioconda -c conda-forge \ - 'bioconda::antismash-lite>=6' 'anaconda::flask' 'anaconda::jinja2' 'anaconda::markupsafe' nomkl && \ + 'bioconda::antismash>=6' 'anaconda::flask' 'anaconda::jinja2' 'anaconda::markupsafe' nomkl && \ rm -rf /opt/conda/envs/antismash/lib/*/site-packages/antismash/databases && \ mamba clean -afy @@ -99,5 +99,5 @@ RUN pip3 install zenodo_get # fix permissions RUN chmod 777 -R /work -RUN chmod 777 -R /opt/conda/envs/antismash/lib/*/site-packages/antismash +RUN chmod 777 -R /opt/conda/envs/antismash/lib/**/site-packages/antismash RUN chmod 777 -R /opt/conda/envs/resfinder \ No newline at end of file diff --git a/docker/renv/Dockerfile b/docker/renv/Dockerfile index e28066a8..5641c0b1 100644 --- a/docker/renv/Dockerfile +++ b/docker/renv/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:22.04 LABEL MAINTAINER Felipe Marques de Almeida @@ -13,7 +13,7 @@ RUN apt-get update && \ ## Install R RUN DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata && \ - DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y r-base r-base-core r-api-3.5 + DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y r-base r-base-core ## Install R-packages RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index 22ee193d..5e90fde1 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -9,6 +9,7 @@ The tracking for changes started in v2.1 * make tools use public bioconda images whenever possible to allow easy addition of tools and avoid much conflicts in docker images * dimish the size and tools inside the docker images, the docker images now are only built to contain tools and all required for modules that cannot just use bioconda docker images. * [[#98](https://github.com/fmalmeida/bacannot/issues/98)] -- Add ICEberg and PHAST blastp results to json summary +* [[#100](https://github.com/fmalmeida/bacannot/issues/100)] -- Update pipeline to use docker shasum instead of tags * Update unicycler docker image to latest '0.5.0--py310h6cc9453_3' to avoid errors originated from previous image containing buggy installation. ## v3.2 [19-December-2022] From e36f991423c82c393567c2b2365260812b0f9150 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 26 Sep 2023 10:44:19 -0400 Subject: [PATCH 24/50] 107 duplicate reads to unique read names (#109) * Add pre-formatted database (#83) * add pre-formatted database info * add information about pre-formatted database * update docs and fix report links * include information of newly known issues (#103) * add parameter to enable deduplication of reads * Update manual.md --- README.md | 6 ++++- conf/defaults.config | 3 +++ docs/manual.md | 1 + modules/assembly/flye.nf | 9 ++++++- modules/assembly/unicycler.nf | 47 ++++++++++++++++++++++++++++++++--- nextflow_schema.json | 4 +++ 6 files changed, 65 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 36625099..dc41e2fe 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,11 @@ It will result in the following: 2. The JBrowse wrapper in the shiny server is not capable of displaying the GC content and methylation plots when available. It can only display the simpler tracks. If the user wants to visualise and interrogate the GC or methylation tracks it must open the JBrowse outside from the shiny server. For that, two options are available: * You can navigate to the `jbrowse` directory under your sample's output folder and simply execute `http-server`. This command can be found at: https://www.npmjs.com/package/http-server * Or, you can download the [JBrowse Desktop app](https://jbrowse.org/docs/jbrowse_desktop.html) and, from inside the app, select the folder `jbrowse/data` that is available in your sample's output directory. -3. If you face some weird error using v3.1, please, before opening a flag, try updating your docker image, we had some inconsistencies lately and this may be the source of the issue. +3. If you face some weird error using v3.1 or v3.2, please, before opening a ticket, try updating your docker images, we had some inconsistencies lately and this may be the source of the issue. +4. If facing an issue with the `BACANNOT:SUMMARY` module, identical or similar to the one reported in issue [[#96]](https://github.com/fmalmeida/bacannot/issues/96), please, before opening a ticket, try updating the python env docker image: `docker pull fmalmeida/bacannot:v3.2_pyenv`. The image has been recently updated to have the latest version of my python scripts, and that may solve the issue. If not, please open another. +5. Sometimes, the `BACANNOT:UNICYCLER` may fail with different, random issues, that does not seem correct, or seem really very random. For example, saying that a read is not available, even though it is there. After some tracing, we realised that the unicycler 0.4.8 installation from conda, and the biocontainer form quay.io is causing this random problem. To solve this issue, please run with a newer version of the tool. This solves the issue in most cases: `--unicycler_version 0.5.0--py310h6cc9453_3`. + * Because `v3.2` is already tagged and frozen with Zenodo, we will not update it, thus, for this version, using the parameter to overwrite the tool version should be used. + * In `v3.3`, unicycler version will be defaulted to `0.5.0--py310h6cc9453_3` ## Citation diff --git a/conf/defaults.config b/conf/defaults.config index 56baa751..0d43fe48 100644 --- a/conf/defaults.config +++ b/conf/defaults.config @@ -32,6 +32,9 @@ params { // It is also documented in the main manual: https://bacannot.readthedocs.io/en/latest/samplesheet input = null +// Enable reads deduplication for assembly? (If input has reads) + enable_deduplication = false + // path to directory containing databases used by bacannot // you can download databases with: // nextflow run fmalmeida/bacannot --get_dbs --output bacannot_dbs -profile diff --git a/docs/manual.md b/docs/manual.md index 95cb1b35..2a51d52e 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -32,6 +32,7 @@ The pipeline accepts as input two other input files types that are used to perfo |
Parameter
| Required | Default | Description | | :--------------------------------------- | :------- | :------ | :---------- | | `--input` | :material-check: | NA | Input samplesheet describing all the samples to be analysed | +| `--enable_deduplication` | :material-close: | false | Run deduplication command on input reads before assembly | | `--output` | :material-check: | results | Name of directory to store output values. A sub-directory for each genome will be created inside this main directory. | | `--bacannot_db` | :material-check: | NA | Path for root directory containing required bacannot databases | diff --git a/modules/assembly/flye.nf b/modules/assembly/flye.nf index 5bbc4668..d588d01e 100644 --- a/modules/assembly/flye.nf +++ b/modules/assembly/flye.nf @@ -18,14 +18,21 @@ process FLYE { script: lr = (lr_type == 'nanopore') ? '--nano-raw' : '--pacbio-raw' + dedup_lr = params.enable_deduplication ? + "gunzip -cf $lreads | awk '{if(NR%4==1) \$0=sprintf(\"@1_%d\",(1+i++)); print;}' | gzip -c > ${prefix}_deduplicated_reads.fastq.gz" : + "ln -s $lreads ${prefix}_deduplicated_reads.fastq.gz" + """ # Save flye version flye -v > flye_version.txt ; + # remove duplicate reads + $dedup_lr + # Run flye flye \\ ${lr} \\ - $lreads \\ + ${prefix}_deduplicated_reads.fastq.gz \\ --out-dir flye_${prefix} \\ --threads $task.cpus &> flye.log ; diff --git a/modules/assembly/unicycler.nf b/modules/assembly/unicycler.nf index 032a3957..145f1c1f 100644 --- a/modules/assembly/unicycler.nf +++ b/modules/assembly/unicycler.nf @@ -17,14 +17,55 @@ process UNICYCLER { path('unicycler_version.txt'), emit: version script: - unpaired_param = (sreads.getName() != "input.3") ? "-s $sreads" : "" - paired_param = (sread1.getName() != "input.1" && sread2.getName() != "input.2") ? "-1 $sread1 -2 $sread2" : "" - lr_param = (lreads.getName() != "input.4") ? "-l $lreads" : "" + unpaired_param = "" + dedup_sreads = "" + paired_param = "" + dedup_paired = "" + lr_param = "" + dedup_lr = "" + + // sreads + if (sreads.getName() != "input.3") { + + dedup_sreads = params.enable_deduplication ? + "gunzip -cf $sreads | awk '{if(NR%4==1) \$0=sprintf(\"@1_%d\",(1+i++)); print;}' | gzip -c > ${prefix}_deduplicated_sreads.fastq.gz" : + "ln -s $sreads ${prefix}_deduplicated_sreads.fastq.gz" + + unpaired_param = "-s ${prefix}_deduplicated_sreads.fastq.gz" + + } + + // paired + if (sread1.getName() != "input.1" && sread2.getName() != "input.2") { + + dedup_paired = params.enable_deduplication ? + "gunzip -cf $sread1 | awk '{if(NR%4==1) \$0=sprintf(\"@1_%d\",(1+i++)); print;}' | gzip -c > ${prefix}_deduplicated_sread_R1.fastq.gz && gunzip -cf $sread2 | awk '{if(NR%4==1) \$0=sprintf(\"@1_%d\",(1+i++)); print;}' | gzip -c > ${prefix}_deduplicated_sread_R2.fastq.gz" : + "ln -s $sread1 ${prefix}_deduplicated_sread_R1.fastq.gz && ln -s $sread2 ${prefix}_deduplicated_sread_R2.fastq.gz" + + paired_param = "-1 ${prefix}_deduplicated_sread_R1.fastq.gz -2 ${prefix}_deduplicated_sread_R2.fastq.gz" + + } + + // lreads + if (lreads.getName() != "input.4") { + + dedup_lr = params.enable_deduplication ? + "gunzip -cf $lreads | awk '{if(NR%4==1) \$0=sprintf(\"@1_%d\",(1+i++)); print;}' | gzip -c > ${prefix}_deduplicated_lreads.fastq.gz" : + "ln -s $lreads ${prefix}_deduplicated_lreads.fastq.gz" + + lr_param = "-l $lreads" + + } """ # Save unicycler version unicycler --version > unicycler_version.txt + # remove duplicate reads + $dedup_sreads + $dedup_paired + $dedup_lr + # Run unicycler unicycler \\ $paired_param \\ diff --git a/nextflow_schema.json b/nextflow_schema.json index a3866ce9..54730b74 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -34,6 +34,10 @@ "type": "string", "description": "Path to input samplesheet" }, + "enable_deduplication": { + "type": "boolean", + "description": "Execute deduplication on reads before assembly." + }, "output": { "type": "string", "description": "Path for output directory", From 04bb226052397989fe759ba8ce2aced3ca0231e8 Mon Sep 17 00:00:00 2001 From: fmalmeida Date: Tue, 26 Sep 2023 16:47:10 +0200 Subject: [PATCH 25/50] update changelog --- markdown/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index 5e90fde1..51207410 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -10,6 +10,7 @@ The tracking for changes started in v2.1 * dimish the size and tools inside the docker images, the docker images now are only built to contain tools and all required for modules that cannot just use bioconda docker images. * [[#98](https://github.com/fmalmeida/bacannot/issues/98)] -- Add ICEberg and PHAST blastp results to json summary * [[#100](https://github.com/fmalmeida/bacannot/issues/100)] -- Update pipeline to use docker shasum instead of tags +* [[#107](https://github.com/fmalmeida/bacannot/issues/107)] -- Add a parameter, `--enable_deduplication` for deduplicating input reads before assembly * Update unicycler docker image to latest '0.5.0--py310h6cc9453_3' to avoid errors originated from previous image containing buggy installation. ## v3.2 [19-December-2022] From 520fd3fa0c58cee5bb274d8b7a5413b23cdb7218 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sun, 1 Oct 2023 06:01:52 -0400 Subject: [PATCH 26/50] Update docs for v3.3 (#110) * update cli help * Update installation.md * add indentation * Update README.md * Update README.md * fix tracedir * always show from copy * Update quickstart.md * Update manual.md --- README.md | 60 +------------ docs/config.md | 188 +---------------------------------------- docs/defaults.config | 191 ++++++++++++++++++++++++++++++++++++++++++ docs/installation.md | 59 +++++++++++-- docs/manual.md | 5 +- docs/quickstart.md | 2 +- docs/requirements.txt | 3 +- mkdocs.yml | 2 + nextflow.config | 9 +- nextflow_schema.json | 16 ++-- 10 files changed, 263 insertions(+), 272 deletions(-) create mode 100644 docs/defaults.config diff --git a/README.md b/README.md index dc41e2fe..f464b049 100644 --- a/README.md +++ b/README.md @@ -87,16 +87,7 @@ These images have been kept separate to not create massive Docker image and to a ## Installation -1. If you don't have it already install [Docker](https://docs.docker.com/) in your computer. - * After installed, you need to download the required Docker images - - ```bash - docker pull fmalmeida/bacannot:v3.3_misc ; - docker pull fmalmeida/bacannot:v3.3_renv ; - docker pull fmalmeida/bacannot:jbrowse ; - ``` - -🔥 Nextflow can also automatically handle images download on the fly when executed. All the other docker images from **biocontainers** are downloaded automatically. If docker has exceeded its download limit rates, please try again in a few hours. +1. If you don't have it already install either [Docker](https://docs.docker.com/) or [Singularity](https://docs.sylabs.io/guides/3.5/user-guide/index.html) in your computer. 2. Install Nextflow (version 20.10 or higher): @@ -110,54 +101,7 @@ These images have been kept separate to not create massive Docker image and to a 🔥 Users can get let the pipeline always updated with: `nextflow pull fmalmeida/bacannot` -### Downloading and updating databases - -Bacannot databases are not inside the docker images anymore to avoid huge images and problems with connections and limit rates with dockerhub. - -#### Pre-formatted - -Users can directly download pre-formatted databases from Zenodo: https://doi.org/10.5281/zenodo.7615811 - -Useful for standardization and also overcoming known issues that may arise when formatting databases with `singularity` profile. - -A module to download the latest pre-formatted database has also been made available: -```bash -# Download pipeline pre-built databases -nextflow run fmalmeida/bacannot --get_zenodo_db --output ./ -profile -``` - -#### I want to generate a new formatted database - -To download and format a copy of required bacannot databases users can execute the following: - -```bash -# Download pipeline databases -nextflow run fmalmeida/bacannot --get_dbs --output bacannot_dbs -profile -``` - -This will produce a directory like this: - -```bash -bacannot_dbs -├── amrfinder_db -├── antismash_db -├── argminer_db -├── card_db -├── iceberg_db -├── kofamscan_db -├── mlst_db -├── phast_db -├── phigaro_db -├── pipeline_info -├── plasmidfinder_db -├── platon_db -├── prokka_db -├── resfinder_db -├── vfdb_db -└── victors_db -``` - -> To update databases you can either download a new one to a new directory. Remove the database you want to get a new one from the root bacannot dir and use the same command above to save in the same directory (the pipeline will only try to download missing databases). Or, you can use the parameter `--force_update` to download everything again. +Please refer to the installation page, for a complete guide on required images and databases. » ## Quickstart diff --git a/docs/config.md b/docs/config.md index 9962abef..9aebaa19 100644 --- a/docs/config.md +++ b/docs/config.md @@ -13,191 +13,5 @@ Default configuration --------------------- ```groovy -/* - - Required / Default Parameters. - This parameters must always be set - -*/ -params { - - /* - - DB DOWNLOAD WORKFLOW - - */ - -// Trigger database download and formatting workflow? --> will not run annotation -// Will download and format a database inside {output} parameter - get_dbs = false - force_update = false - - /* - - ANNOTATION INPUTS - - */ - -// Input data mus be given inside a well-formated samplesheet. -// We provide a well-formated example at: https://github.com/fmalmeida/test_datasets/raw/main/bacannot_testing_samplesheets/samplesheet.yaml -// -// Please read the example samplesheet so you can understand how to properly fill it. -// -// It is also documented in the main manual: https://bacannot.readthedocs.io/en/latest/samplesheet - input = null - -// path to directory containing databases used by bacannot -// you can download databases with: -// nextflow run fmalmeida/bacannot --get_dbs --output bacannot_dbs -profile - bacannot_db = null - - /* - - GENERAL PARAMETERS - - */ - -// Main output folder name. More than one bacannot annotation can be redirected -// to the same output parameter. It is good to keep related annotations together. -// A subdirectory with the filename will be created inside this directory. - output = 'results' - -// Number of minimum overlapping base pairs required for merging -// Negative values, such as -20, means the number of required overlapping bases for merging. -// Positive values, such as 5, means the maximum distance accepted between features for merging. -// By default (if Blank), this process is not executed. For execution the user needs to provide a value - bedtools_merge_distance = null - - /* - * Bakta optional - */ -// If user set path to an existing bakta database, the pipeline will use bakta instead of prokka - bakta_db = null - - /* - * Prokka optional parameters - */ -// Include comprehensive PGAP hmm database in prokka annotation instead of TIGRFAM. -// PGAP is big and using it may have higher running times but better results - prokka_use_pgap = false - -// Annotation mode: Archaea|Bacteria|Mitochondria|Viruses (default 'Bacteria') - prokka_kingdom = null - -// Translation table code. Must be set if the above is set. -// Example: params.prokka_genetic.code = 11 - prokka_genetic_code = null - -// Use rnammer instead of Barrnap? False or True? - prokka_use_rnammer = false - - /* - * Resfinder species panel - */ - -// Species panel to be used when annotating with Resfinder. -// It sets a default for all samples in the samplesheet. -// If a sample has a different value inside the samplesheet it will overwrite the value for that sample -// If blank it will not be executed. -// It must be identical (without the *) as written in their webservice https://cge.cbs.dtu.dk/services/ResFinder/. -// E.g. 'Escherichia coli'; 'Klebsiella' ... - resfinder_species = null - - /* - * Handling the execution of processes - * - * By default, all processes are executed. These - * parameters tells wheter NOT to run a process. - * - * Which means: false will allow its execution - * while true will create a barrier and skip a process. - */ -// (NOT RUN?) Plasmids annotation (controls PlasmidFinder execution) - skip_plasmid_search = false - -// (NOT RUN?) General Virulence annotation (controls VFDB and Victors scan) - skip_virulence_search = false - -// (NOT RUN?) Resistance annotation (controls AMRfinder and RGI) - skip_resistance_search = false - -// (NOT RUN?) ICE annotation (controls ICEberg annotation) - skip_iceberg_search = false - -// (NOT RUN?) prophage annotation (controls PHAST and Phigaro) - skip_prophage_search = false - -// (NOT RUN?) KO (KEGG Orthology) annotation - skip_kofamscan = false - -// (NOT RUN?) antiSMASH (secondary metabolite) annotation - skip_antismash = false - - /* - * Custom databases can be used to annotate additional genes in the genome. - * It runs a BLAST alignment against the genome, therefore, the custom database - * More than one custom database can be given separated by commas. - * Gene headers must be properly formated as described in the - * documentation: https://bacannot.readthedocs.io/en/latest/custom-db - */ -// Custom fastas (PROT / NUCL) - custom_db = null -// Custom annotation using list of NCBI protein accs - ncbi_proteins = null - - /* - * Annotation thresholds to be used when scanning specific databases and features - * Select a combination of thresholds that is meaningful for your data. Some of - * the databases are protein-only, others are nucleotide only. We cannnot control - * that and the databases will be scanned either if blastp or blastn using these - * thresholds described here. - */ - -// Identity threshold for plasmid annotation - plasmids_minid = 90 - -// Coverage threshold for plasmid annotation - plasmids_mincov = 60 - -// Virulence genes identity threshold - blast_virulence_minid = 90 - -// Virulence genes coverage threshold - blast_virulence_mincov = 90 - -// AMR genes identity threshold - blast_resistance_minid= 90 - -// AMR genes coverage threshold - blast_resistance_mincov = 90 - -// MGEs (ICEs and Phages) identity threshold - blast_MGEs_minid = 85 - -// MGEs (ICEs and Phages) coverage threshold - blast_MGEs_mincov = 85 - -// User's custom database identity threshold - blast_custom_minid = 65 - -// User's custom database coverage threshold - blast_custom_mincov = 65 - - /* - * Resources allocation configuration - * Defaults only, expecting to be overwritten - */ -// Select versions of bioconda quay.io additional tools -// Tools that are not part of the core of the pipeline, -// but can eventually be used by users - unicycler_version = '0.4.8--py38h8162308_3' - flye_version = '2.9--py39h39abbe0_0' - bakta_version = '1.7.0--pyhdfd78af_1' - -// Max resource options - max_memory = '20.GB' - max_cpus = 16 - max_time = '40.h' - -} +{% include 'defaults.config' %} ``` \ No newline at end of file diff --git a/docs/defaults.config b/docs/defaults.config new file mode 100644 index 00000000..0d43fe48 --- /dev/null +++ b/docs/defaults.config @@ -0,0 +1,191 @@ +/* + + Required / Default Parameters. + This parameters must always be set + +*/ +params { + + /* + + DB DOWNLOAD WORKFLOW + + */ + +// Trigger database download and formatting workflow? --> will not run annotation +// Will download and format a database inside {output} parameter + get_dbs = false + force_update = false + get_zenodo_db = false // download pre-built database + + /* + + ANNOTATION INPUTS + + */ + +// Input data mus be given inside a well-formated samplesheet. +// We provide a well-formated example at: https://github.com/fmalmeida/test_datasets/raw/main/bacannot_testing_samplesheets/samplesheet.yaml +// +// Please read the example samplesheet so you can understand how to properly fill it. +// +// It is also documented in the main manual: https://bacannot.readthedocs.io/en/latest/samplesheet + input = null + +// Enable reads deduplication for assembly? (If input has reads) + enable_deduplication = false + +// path to directory containing databases used by bacannot +// you can download databases with: +// nextflow run fmalmeida/bacannot --get_dbs --output bacannot_dbs -profile + bacannot_db = null + + /* + + GENERAL PARAMETERS + + */ + +// Main output folder name. More than one bacannot annotation can be redirected +// to the same output parameter. It is good to keep related annotations together. +// A subdirectory with the filename will be created inside this directory. + output = 'results' + +// Number of minimum overlapping base pairs required for merging +// Negative values, such as -20, means the number of required overlapping bases for merging. +// Positive values, such as 5, means the maximum distance accepted between features for merging. +// By default (if Blank), this process is not executed. For execution the user needs to provide a value + bedtools_merge_distance = null + + /* + * Bakta optional + */ +// If user set path to an existing bakta database, the pipeline will use bakta instead of prokka + bakta_db = null + + /* + * Prokka optional parameters + */ +// Include comprehensive PGAP hmm database in prokka annotation instead of TIGRFAM. +// PGAP is big and using it may have higher running times but better results + prokka_use_pgap = false + +// Annotation mode: Archaea|Bacteria|Mitochondria|Viruses (default 'Bacteria') + prokka_kingdom = null + +// Translation table code. Must be set if the above is set. +// Example: params.prokka_genetic.code = 11 + prokka_genetic_code = null + +// Use rnammer instead of Barrnap? False or True? + prokka_use_rnammer = false + + /* + * Resfinder species panel + */ + +// Species panel to be used when annotating with Resfinder. +// It sets a default for all samples in the samplesheet. +// If a sample has a different value inside the samplesheet it will overwrite the value for that sample +// If blank it will not be executed. +// It must be identical (without the *) as written in their webservice https://cge.cbs.dtu.dk/services/ResFinder/. +// E.g. 'Escherichia coli'; 'Klebsiella' ... + resfinder_species = null + + /* + * Handling the execution of processes + * + * By default, all processes are executed. These + * parameters tells wheter NOT to run a process. + * + * Which means: false will allow its execution + * while true will create a barrier and skip a process. + */ +// (NOT RUN?) Plasmids annotation (controls PlasmidFinder execution) + skip_plasmid_search = false + +// (NOT RUN?) General Virulence annotation (controls VFDB and Victors scan) + skip_virulence_search = false + +// (NOT RUN?) Resistance annotation (controls AMRfinder and RGI) + skip_resistance_search = false + +// (NOT RUN?) ICE annotation (controls ICEberg annotation) + skip_iceberg_search = false + +// (NOT RUN?) prophage annotation (controls PHAST and Phigaro) + skip_prophage_search = false + +// (NOT RUN?) KO (KEGG Orthology) annotation + skip_kofamscan = false + +// (NOT RUN?) antiSMASH (secondary metabolite) annotation + skip_antismash = false + + /* + * Custom databases can be used to annotate additional genes in the genome. + * It runs a BLAST alignment against the genome, therefore, the custom database + * More than one custom database can be given separated by commas. + * Gene headers must be properly formated as described in the + * documentation: https://bacannot.readthedocs.io/en/latest/custom-db + */ +// Custom fastas (PROT / NUCL) + custom_db = null +// Custom annotation using list of NCBI protein accs + ncbi_proteins = null + + /* + * Annotation thresholds to be used when scanning specific databases and features + * Select a combination of thresholds that is meaningful for your data. Some of + * the databases are protein-only, others are nucleotide only. We cannnot control + * that and the databases will be scanned either if blastp or blastn using these + * thresholds described here. + */ + +// Identity threshold for plasmid annotation + plasmids_minid = 90 + +// Coverage threshold for plasmid annotation + plasmids_mincov = 60 + +// Virulence genes identity threshold + blast_virulence_minid = 90 + +// Virulence genes coverage threshold + blast_virulence_mincov = 90 + +// AMR genes identity threshold + blast_resistance_minid= 90 + +// AMR genes coverage threshold + blast_resistance_mincov = 90 + +// MGEs (ICEs and Phages) identity threshold + blast_MGEs_minid = 85 + +// MGEs (ICEs and Phages) coverage threshold + blast_MGEs_mincov = 85 + +// User's custom database identity threshold + blast_custom_minid = 65 + +// User's custom database coverage threshold + blast_custom_mincov = 65 + + /* + * Resources allocation configuration + * Defaults only, expecting to be overwritten + */ +// Select versions of bioconda quay.io additional tools +// Tools that are not part of the core of the pipeline, +// but can eventually be used by users + unicycler_version = '0.5.0--py310h6cc9453_3' + flye_version = '2.9--py39h6935b12_1' + bakta_version = '1.7.0--pyhdfd78af_1' + +// Max resource options + max_memory = '20.GB' + max_cpus = 16 + max_time = '40.h' + +} diff --git a/docs/installation.md b/docs/installation.md index 5056467d..8035ba43 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -19,17 +19,30 @@ nextflow pull fmalmeida/bacannot ## Downloading docker images -The custom docker images used by the pipeline are: +> The pipeline uses both custom and public images. +> All images can be downloaded on the fly, automatically by nextflow, and this is the recommended way to do it. + +If you want to download it yourself, you can find all the images used in the pipeline described in the file [docker.config](https://github.com/fmalmeida/bacannot/blob/master/conf/docker.config) (for docker) and [singularity.config](https://github.com/fmalmeida/bacannot/blob/master/conf/singularity.config) (for singularity). + +The images are defined like the following: ```bash -docker pull fmalmeida/bacannot:v3.3_misc ; -docker pull fmalmeida/bacannot:v3.3_renv ; -docker pull fmalmeida/bacannot:jbrowse ; +... +withLabel: 'db_download|db_tools|misc' { + container = 'fmalmeida/bacannot@sha256:726e085f1bd71b47c2d8a38fd46d812aab7eb8978bab7bf3cde3aa2b7b3e0f2c' +} +... ``` -> The pipeline also uses other public images available in biocontainers. All images can be downloaded on the fly, automatically be nextflow. +And could be downloaded like this: + +```bash +docker pull fmalmeida/bacannot@sha256:726e085f1bd71b47c2d8a38fd46d812aab7eb8978bab7bf3cde3aa2b7b3e0f2c +``` -!!! info "Using singularity" +> You would need to do it for each image. + +!!! info "If using singularity" **Docker and singularity images are downloaded on the fly**. Be sure to properly set `NXF_SINGULARITY_LIBRARYDIR` env variable to a writable directory if using Singularity. This will make that the downloaded images are reusable through different executions. Read more at: https://www.nextflow.io/docs/latest/singularity.html#singularity-docker-hub @@ -42,14 +55,44 @@ docker pull fmalmeida/bacannot:jbrowse ; singularity pull --dir $NXF_SINGULARITY_LIBRARYDIR fmalmeida-bacannot-v3.3_misc.img docker://fmalmeida/bacannot:v3.3_misc ``` +## Bacannot databases + +Bacannot databases are not inside the docker images anymore to avoid huge images and problems with connections and limit rates with dockerhub. + +### Pre-formatted + +Users can directly download pre-formatted databases from Zenodo: https://doi.org/10.5281/zenodo.7615811 + +Useful for standardization and also overcoming known issues that may arise when formatting databases with `singularity` profile. + +A module to download the latest pre-formatted database has also been made available: + +```bash +# Download pipeline pre-built databases +nextflow run fmalmeida/bacannot \ + --get_zenodo_db \ + --output ./ \ + -profile +``` + +### I want to generate a new formatted database + +```{bash .annotate hl_lines="5"} +# Download pipeline databases +nextflow run fmalmeida/bacannot \ + --get_dbs \ + --output bacannot_dbs \ + -profile +``` + ## Testing your installation After that, you can run the pipeline with a testing dataset by selecting one of the available profiles: 1. Docker - * `nextflow run fmalmeida/mpgap -profile docker,test` + * `nextflow run fmalmeida/mpgap -profile docker,test` --bacannot_db ./bacannot_dbs 2. Singularity - * `nextflow run fmalmeida/mpgap -profile singularity,test` + * `nextflow run fmalmeida/mpgap -profile singularity,test` --bacannot_db ./bacannot_dbs !!! note "About NF profiles" diff --git a/docs/manual.md b/docs/manual.md index 2a51d52e..e42c1227 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -29,10 +29,10 @@ The pipeline accepts as input two other input files types that are used to perfo ## Input/output options -|
Parameter
| Required | Default | Description | +|
Parameter
| Required | Default | Description | | :--------------------------------------- | :------- | :------ | :---------- | | `--input` | :material-check: | NA | Input samplesheet describing all the samples to be analysed | -| `--enable_deduplication` | :material-close: | false | Run deduplication command on input reads before assembly | +| `--enable_deduplication` | :material-close: | false | Run deduplication command on input reads before assembly. Only useful for samples where reads are given instead of a genome fasta. | | `--output` | :material-check: | results | Name of directory to store output values. A sub-directory for each genome will be created inside this main directory. | | `--bacannot_db` | :material-check: | NA | Path for root directory containing required bacannot databases | @@ -46,6 +46,7 @@ The pipeline accepts as input two other input files types that are used to perfo | :--------------------------------------- | :------- | :------ | :---------- | | `--get_dbs` | :material-close: | false | Instead of running the analysis workflow, it will try to download required databases and save them in `--output` | | `--force_update` | :material-close: | false | Instead of only downloading missing databases, download everything again and overwrite. | +| `--get_zenodo_db` | :material-close: | false | Download pre-built databases stored in zenodo. [See quickstart](quickstart.md#). !!! tip "" diff --git a/docs/quickstart.md b/docs/quickstart.md index 5490726f..42b5ee89 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -102,4 +102,4 @@ nextflow run fmalmeida/bacannot -profile docker,quicktest --bacannot_db ./bacann ### Annotation with bakta -User can also perform the core generic annotation with bakta instead of prokka. Please read [the manual](manual#bakta-annotation). +User can also perform the core generic annotation with bakta instead of prokka. Please read [the manual](manual.md#bakta-annotation). diff --git a/docs/requirements.txt b/docs/requirements.txt index d5ff7eda..42c51dbf 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -14,4 +14,5 @@ mergedeep>=1.3.4 colorama>=0.4; platform_system == 'Windows' mkdocs-pymdownx-material-extras mkdocs-git-revision-date-plugin -mkdocs-material \ No newline at end of file +mkdocs-material +mkdocs-macros-plugin \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 58482435..1a5157ba 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -22,6 +22,8 @@ theme: repo: fontawesome/brands/github-alt plugins: - git-revision-date + - search + - macros markdown_extensions: - pymdownx.emoji: emoji_index: !!python/name:materialx.emoji.twemoji diff --git a/nextflow.config b/nextflow.config index 1afb0454..11ead557 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,7 +13,6 @@ includeConfig 'conf/defaults.config' params { // Boilerplate options - tracedir = "${params.output}/pipeline_info" plaintext_email = false monochrome_logs = false help = false @@ -84,19 +83,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/bacannot_timeline_${trace_timestamp}.html" + file = "${params.output}/pipeline_info/bacannot_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/bacannot_report_${trace_timestamp}.html" + file = "${params.output}/pipeline_info/bacannot_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/bacannot_trace_${trace_timestamp}.txt" + file = "${params.output}/pipeline_info/bacannot_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/bacannot_pipeline_dag_${trace_timestamp}.svg" + file = "${params.output}/pipeline_info/bacannot_pipeline_dag_${trace_timestamp}.svg" } /* diff --git a/nextflow_schema.json b/nextflow_schema.json index 54730b74..e95d3c34 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -63,15 +63,18 @@ "properties": { "max_cpus": { "type": "integer", - "default": 16 + "default": 16, + "description": "Maximum number of cpus a single module can use." }, "max_memory": { "type": "string", - "default": "20.GB" + "default": "20.GB", + "description": "Maximum memory a single module can use." }, "max_time": { "type": "string", - "default": "40.h" + "default": "40.h", + "description": "Maximum time a module can run." } } }, @@ -322,13 +325,6 @@ "help_text": "Number of minimum overlapping base pairs required for merging\nNegative values, such as -20, means the number of required overlapping bases for merging.\nPositive values, such as 5, means the maximum distance accepted between features for merging.\nBy default (if Blank), this process is not executed. For execution the user needs to provide a value", "description": "Minimum overlapping base pairs required for merging" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.output}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", From 93a62ee6387ee04a4cdfb1c40d1980f272cafeeb Mon Sep 17 00:00:00 2001 From: fmalmeida Date: Sun, 1 Oct 2023 12:26:38 +0200 Subject: [PATCH 27/50] update citation information --- README.md | 3 ++- docs/index.md | 3 ++- lib/WorkflowMain.groovy | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f464b049..62f2a1dc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3627669-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3627669) +[![F1000 Paper](https://img.shields.io/badge/Citation%20F1000-10.12688/f1000research.139488.1-orange)](https://doi.org/10.12688/f1000research.139488.1) [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/fmalmeida/bacannot?include_prereleases&label=Latest%20release)](https://github.com/fmalmeida/bacannot/releases) [![Documentation](https://img.shields.io/badge/Documentation-readthedocs-brightgreen)](https://bacannot.readthedocs.io/en/latest/?badge=latest) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) @@ -8,6 +8,7 @@ [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![License](https://img.shields.io/badge/License-GPL%203-black)](https://github.com/fmalmeida/bacannot/blob/master/LICENSE) [![Follow on Twitter](http://img.shields.io/badge/twitter-%40fmarquesalmeida-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/fmarquesalmeida) +[![Zenodo Archive](https://img.shields.io/badge/Zenodo-Archive-blue)](https://doi.org/10.5281/zenodo.3627669) [![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/github.com/fmalmeida/bacannot) diff --git a/docs/index.md b/docs/index.md index 3ee7b0df..4cb28ba9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,7 +2,7 @@ -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3627669-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3627669) +[![F1000 Paper](https://img.shields.io/badge/Citation%20F1000-10.12688/f1000research.139488.1-orange)](https://doi.org/10.12688/f1000research.139488.1) [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/fmalmeida/bacannot?include_prereleases&label=Latest%20release)](https://github.com/fmalmeida/bacannot/releases) [![Documentation](https://img.shields.io/badge/Documentation-readthedocs-brightgreen)](https://bacannot.readthedocs.io/en/latest/?badge=latest) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) @@ -10,6 +10,7 @@ [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![License](https://img.shields.io/badge/License-GPL%203-black)](https://github.com/fmalmeida/bacannot/blob/master/LICENSE) [![Follow on Twitter](http://img.shields.io/badge/twitter-%40fmarquesalmeida-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/fmarquesalmeida) +[![Zenodo Archive](https://img.shields.io/badge/Zenodo-Archive-blue)](https://doi.org/10.5281/zenodo.3627669) ## About diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 57d85384..6ccb1c75 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -10,7 +10,7 @@ class WorkflowMain { public static String citation(workflow) { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.3627669\n\n" + + " https://doi.org/10.12688/f1000research.139488.1\n\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + From ae51e18eb87cf1ea2a44b89d747bd477186ef314 Mon Sep 17 00:00:00 2001 From: fmalmeida Date: Sun, 1 Oct 2023 12:27:49 +0200 Subject: [PATCH 28/50] add citation example --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 62f2a1dc..2eda798d 100644 --- a/README.md +++ b/README.md @@ -195,7 +195,11 @@ It will result in the following: ## Citation -To cite this tool please refer to our [Zenodo tag](https://doi.org/10.5281/zenodo.3627669). +In order to cite this pipeline, please refer to: + +> Almeida FMd, Campos TAd and Pappas Jr GJ. Scalable and versatile container-based pipelines for de novo genome assembly and bacterial annotation. [version 1; peer review: awaiting peer review]. F1000Research 2023, 12:1205 (https://doi.org/10.12688/f1000research.139488.1) + +Additionally, archived versions of the pipeline are also found in [Zenodo](https://doi.org/10.5281/zenodo.3627669). This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [GPLv3](https://github.com/fmalmeida/bacannot/blob/master/LICENSE). From 1c28330c087a37a60f5e2e5f209c453029af8b36 Mon Sep 17 00:00:00 2001 From: fmalmeida Date: Sun, 1 Oct 2023 14:31:17 +0200 Subject: [PATCH 29/50] Update CHANGELOG.md --- markdown/CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index 51207410..c5bbbec5 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -2,16 +2,19 @@ The tracking for changes started in v2.1 -## v3.3 [TBD] +## v3.3 [01-October-2023] * [[#50](https://github.com/fmalmeida/bacannot/issues/50)] -- Add `Integron Finder` tool to the pipeline * [[#69](https://github.com/fmalmeida/bacannot/issues/69)] -- Change how tools use docker images in order to: * make tools use public bioconda images whenever possible to allow easy addition of tools and avoid much conflicts in docker images * dimish the size and tools inside the docker images, the docker images now are only built to contain tools and all required for modules that cannot just use bioconda docker images. +* [[#81](https://github.com/fmalmeida/bacannot/issues/81)] -- Add `MOB Suite` tool to the pipeline +* [[#85](https://github.com/fmalmeida/bacannot/issues/85)] -- Include checkup on header size for Prokka * [[#98](https://github.com/fmalmeida/bacannot/issues/98)] -- Add ICEberg and PHAST blastp results to json summary * [[#100](https://github.com/fmalmeida/bacannot/issues/100)] -- Update pipeline to use docker shasum instead of tags * [[#107](https://github.com/fmalmeida/bacannot/issues/107)] -- Add a parameter, `--enable_deduplication` for deduplicating input reads before assembly * Update unicycler docker image to latest '0.5.0--py310h6cc9453_3' to avoid errors originated from previous image containing buggy installation. +* Other minor changes / updates highlited in [[#93](https://github.com/fmalmeida/bacannot/pull/93)] ## v3.2 [19-December-2022] From 529d41438554f4475c7174ee9c700ff45a117048 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sun, 29 Oct 2023 08:49:11 +0100 Subject: [PATCH 30/50] 111 error on summary step when missing annotationidtxt key (#112) * update for dev-testing * adapt to antismash lite as the normal installation is horrible * change for dev image * updated docker image with stable 1.2.4 release of falmeida-py package for bugfix * update changelog --- conf/docker.config | 2 +- conf/singularity.config | 2 +- docker/misc/Dockerfile | 2 +- docs/installation.md | 4 ++-- markdown/CHANGELOG.md | 4 ++++ 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/conf/docker.config b/conf/docker.config index 28ffa1ab..8ad97a72 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -18,7 +18,7 @@ process { // Custom pipeline's containers with various tools for general purposes // withLabel: 'db_download|db_tools|misc' { - container = 'fmalmeida/bacannot@sha256:726e085f1bd71b47c2d8a38fd46d812aab7eb8978bab7bf3cde3aa2b7b3e0f2c' + container = 'fmalmeida/bacannot@sha256:0648797837cd8e11b6abd40745cafc0db81647953921ec54ce0ceef9ecef6450' } // container for R tools diff --git a/conf/singularity.config b/conf/singularity.config index f75a7559..57f949cf 100644 --- a/conf/singularity.config +++ b/conf/singularity.config @@ -18,7 +18,7 @@ process { // Custom pipeline's containers with various tools for general purposes // withLabel: 'db_download|db_tools|misc' { - container = 'docker://fmalmeida/bacannot@sha256:726e085f1bd71b47c2d8a38fd46d812aab7eb8978bab7bf3cde3aa2b7b3e0f2c' + container = 'docker://fmalmeida/bacannot@sha256:0648797837cd8e11b6abd40745cafc0db81647953921ec54ce0ceef9ecef6450' } // container for R tools diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index 3e7c3422..844526a9 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -81,7 +81,7 @@ ENV PATH=/work/digIS:$PATH # Create env for antismash RUN mamba create -y -n antismash -c bioconda -c conda-forge \ - 'bioconda::antismash>=6' 'anaconda::flask' 'anaconda::jinja2' 'anaconda::markupsafe' nomkl && \ + 'bioconda::antismash-lite>=6' 'anaconda::flask' 'anaconda::jinja2' 'anaconda::markupsafe' nomkl && \ rm -rf /opt/conda/envs/antismash/lib/*/site-packages/antismash/databases && \ mamba clean -afy diff --git a/docs/installation.md b/docs/installation.md index 8035ba43..81453f9d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -29,7 +29,7 @@ The images are defined like the following: ```bash ... withLabel: 'db_download|db_tools|misc' { - container = 'fmalmeida/bacannot@sha256:726e085f1bd71b47c2d8a38fd46d812aab7eb8978bab7bf3cde3aa2b7b3e0f2c' + container = 'fmalmeida/bacannot@sha256:0648797837cd8e11b6abd40745cafc0db81647953921ec54ce0ceef9ecef6450' } ... ``` @@ -37,7 +37,7 @@ withLabel: 'db_download|db_tools|misc' { And could be downloaded like this: ```bash -docker pull fmalmeida/bacannot@sha256:726e085f1bd71b47c2d8a38fd46d812aab7eb8978bab7bf3cde3aa2b7b3e0f2c +docker pull fmalmeida/bacannot@sha256:0648797837cd8e11b6abd40745cafc0db81647953921ec54ce0ceef9ecef6450 ``` > You would need to do it for each image. diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index c5bbbec5..74a34f9f 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -2,6 +2,10 @@ The tracking for changes started in v2.1 +## v3.3.1 [TBD] + +* [[#111](https://github.com/fmalmeida/bacannot/issues/111)] -- Updated `falmeida-py` package version to fix problem with missing key for Summary. + ## v3.3 [01-October-2023] * [[#50](https://github.com/fmalmeida/bacannot/issues/50)] -- Add `Integron Finder` tool to the pipeline From 6975eacaf63ef5235a13ff34c1df4f0c16317ab2 Mon Sep 17 00:00:00 2001 From: fmalmeida Date: Tue, 20 Feb 2024 21:57:15 +0100 Subject: [PATCH 31/50] update version --- .zenodo.json | 2 +- markdown/CHANGELOG.md | 6 ++++++ nextflow.config | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index 67a52e61..cade7500 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -2,7 +2,7 @@ "description": "

The pipeline

\n\n

bacannot, is a customisable, easy to use, pipeline that uses state-of-the-art software for comprehensively annotating prokaryotic genomes having only Docker and Nextflow as dependencies. It is able to annotate and detect virulence and resistance genes, plasmids, secondary metabolites, genomic islands, prophages, ICEs, KO, and more, while providing nice an beautiful interactive documents for results exploration.

", "license": "other-open", "title": "fmalmeida/bacannot: A generic but comprehensive bacterial annotation pipeline", - "version": "v3.3.2", + "version": "v3.3.3", "upload_type": "software", "creators": [ { diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index 8b7d06e0..b6593da6 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -2,6 +2,12 @@ The tracking for changes started in v2.1 +## v3.3.3 [TBD] + +* [[#118](https://github.com/fmalmeida/bacannot/issues/116)] + * Add a parameter to allow user to skip `INTEGRON_FINDER` execution. + * Add a parameter to allow user to skip `CIRCOS` execution. + ## v3.3.2 [09-February-2024] * [[#116](https://github.com/fmalmeida/bacannot/issues/116)] -- Small update to avoid having `integron_finder` gbks with start position as 0, since it breaks conversion to gff. diff --git a/nextflow.config b/nextflow.config index 0f57904a..2432cac8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -108,7 +108,7 @@ manifest { homePage = "https://github.com/fmalmeida/bacannot" mainScript = "main.nf" nextflowVersion = "!>=22.10.1" - version = '3.3.2' + version = '3.3.3' } // Function to ensure that resource requirements don't go beyond From 15f94a2ae51e0b0117ecf9e1cc3c8afe5b5dbb55 Mon Sep 17 00:00:00 2001 From: fmalmeida Date: Tue, 20 Feb 2024 21:57:37 +0100 Subject: [PATCH 32/50] add new params add parameters to skip the INTEGRON_FINDER and CIRCOS modules when desired. --- conf/defaults.config | 6 ++++++ nextflow_schema.json | 12 ++++++++++++ workflows/bacannot.nf | 45 ++++++++++++++++++++++++------------------- 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/conf/defaults.config b/conf/defaults.config index 0d43fe48..5a0d0e7d 100644 --- a/conf/defaults.config +++ b/conf/defaults.config @@ -122,6 +122,12 @@ params { // (NOT RUN?) antiSMASH (secondary metabolite) annotation skip_antismash = false +// (NOT RUN?) integron finder tool + skip_integron_finder = false + +// (NOT RUN?) CIRCOS tool + skip_icircos = false + /* * Custom databases can be used to annotate additional genes in the genome. * It runs a BLAST alignment against the genome, therefore, the custom database diff --git a/nextflow_schema.json b/nextflow_schema.json index e95d3c34..8dc7aefa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -132,6 +132,12 @@ "help_text": "If true, the process will be skipped!", "hidden": true }, + "skip_circos": { + "type": "boolean", + "description": "Skip (do not run) circos?", + "help_text": "If true, the process will be skipped!", + "hidden": true + }, "skip_plasmid_search": { "type": "boolean", "description": "Skip (do not run) plasmidfinder?", @@ -156,6 +162,12 @@ "help_text": "If true, the process will be skipped!", "hidden": true }, + "skip_integron_finder": { + "type": "boolean", + "description": "Skip (do not run) integron finder?", + "help_text": "If true, the process will be skipped!", + "hidden": true + }, "skip_prophage_search": { "type": "boolean", "description": "Skip (do not run) prophage annotation?", diff --git a/workflows/bacannot.nf b/workflows/bacannot.nf index 93087adf..344556ac 100644 --- a/workflows/bacannot.nf +++ b/workflows/bacannot.nf @@ -140,8 +140,13 @@ workflow BACANNOT { ISLANDPATH( annotation_out_ch.gbk ) // Integron_finder software - INTEGRON_FINDER( annotation_out_ch.genome ) - INTEGRON_FINDER_2GFF( INTEGRON_FINDER.out.gbk ) + if (!params.skip_integron_finder) { + INTEGRON_FINDER( annotation_out_ch.genome ) + INTEGRON_FINDER_2GFF( INTEGRON_FINDER.out.gbk ) + ch_integron_finder_gff = INTEGRON_FINDER_2GFF.out.gff + } else { + ch_integron_finder_gff = Channel.empty() + } // Virulence search if (params.skip_virulence_search == false) { @@ -300,7 +305,7 @@ workflow BACANNOT { .join(phast_output_ch, remainder: true) .join(DIGIS.out.gff, remainder: true) .join(ch_custom_annotations, remainder: true) - .join(INTEGRON_FINDER_2GFF.out.gff, remainder: true) + .join(ch_integron_finder_gff, remainder: true) ) /* @@ -340,7 +345,7 @@ workflow BACANNOT { .join( MERGE_ANNOTATIONS.out.digis_gff ) .join( antismash_output_ch, remainder: true ) .join( MERGE_ANNOTATIONS.out.customdb_gff.groupTuple(), remainder: true ) - .join( INTEGRON_FINDER_2GFF.out.gff, remainder: true ) + .join( ch_integron_finder_gff, remainder: true ) ) // Render reports @@ -376,7 +381,7 @@ workflow BACANNOT { .join( DRAW_GIS.out.example, remainder: true ) .join( phast_output_ch, remainder: true ) .join( MERGE_ANNOTATIONS.out.digis_gff ) - .join( INTEGRON_FINDER_2GFF.out.gff, remainder: true ) + .join( ch_integron_finder_gff, remainder: true ) ) // @@ -405,7 +410,7 @@ workflow BACANNOT { .join( DIGIS.out.all , remainder: true ) .join( antismash_all_ch , remainder: true ) .join( MERGE_ANNOTATIONS.out.all , remainder: true ) - .join( INTEGRON_FINDER_2GFF.out.gff, remainder: true ) + .join( ch_integron_finder_gff , remainder: true ) .join( mobsuite_output_ch , remainder: true ) ) MERGE_SUMMARIES( @@ -413,19 +418,19 @@ workflow BACANNOT { ) // Render circos plots - circos_input_ch = - annotation_out_ch.genome - .join( annotation_out_ch.gff , remainder: true ) - .join( MERGE_ANNOTATIONS.out.gff, remainder: true ) - .join( PHISPY.out.gff , remainder: true ) - .map{ - it -> - sample = it[0] - it.remove(0) - [ sample, it ] - } - CIRCOS( - circos_input_ch - ) + if (!params.skip_circos) { + circos_input_ch = + annotation_out_ch.genome + .join( annotation_out_ch.gff , remainder: true ) + .join( MERGE_ANNOTATIONS.out.gff, remainder: true ) + .join( PHISPY.out.gff , remainder: true ) + .map{ + it -> + sample = it[0] + it.remove(0) + [ sample, it ] + } + CIRCOS( circos_input_ch ) + } } From 2670dd97225735f2ce11ba1cf52b32004235cbb8 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 22 Feb 2024 15:40:19 +0000 Subject: [PATCH 33/50] add documentation on new parameters --- docs/manual.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/manual.md b/docs/manual.md index e42c1227..1a2b1df3 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -96,6 +96,8 @@ The use of this parameter sets a default value for input samples. If a sample ha | `--skip_prophage_search` | :material-close: | false | Tells whether not to run prophage annotation modules | | `--skip_kofamscan` | :material-close: | false | Tells whether not to run KEGG orthology (KO) annotation with KofamScan | | `--skip_antismash` | :material-close: | false | Tells whether or not to run antiSMASH (secondary metabolite) annotation. AntiSMASH is executed using only its core annotation modules in order to keep it fast. | +| `--skip_circos` | :material-close: | false | Tells whether or not to run the final `CIRCOS` module. When the input genome has many contigs, its results are not meaningful. | +| `--skip_integron_finder` | :material-close: | false | Tells whether or not to run the integron finder tool. | ## Custom databases From 89c5cbe873a3de425c56c5aac80b38a6cd6fdba2 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 22 Feb 2024 15:41:18 +0000 Subject: [PATCH 34/50] fix typo --- conf/defaults.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/defaults.config b/conf/defaults.config index 5a0d0e7d..4bce1ca8 100644 --- a/conf/defaults.config +++ b/conf/defaults.config @@ -126,7 +126,7 @@ params { skip_integron_finder = false // (NOT RUN?) CIRCOS tool - skip_icircos = false + skip_circos = false /* * Custom databases can be used to annotate additional genes in the genome. From d0741d60e8532c9991fe5d811be818b86bef0cd5 Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Wed, 6 Mar 2024 12:37:39 -0300 Subject: [PATCH 35/50] fix code so that it also runs for singularity --- conf/docker.config | 1 + conf/singularity.config | 12 ++++++++---- modules/bacannot_dbs/antismash.nf | 26 +++++++++++++++++++++++++- nextflow.config | 2 +- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/conf/docker.config b/conf/docker.config index 8ad97a72..507d23dc 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -4,6 +4,7 @@ docker { enabled = true runOptions = '--platform linux/amd64 -u root:$(id -g)' } +params.running_engine = 'docker' /* diff --git a/conf/singularity.config b/conf/singularity.config index 57f949cf..802662dd 100644 --- a/conf/singularity.config +++ b/conf/singularity.config @@ -1,9 +1,13 @@ // Container usage and permission -docker.enabled = false -singularity.enabled = true -singularity.runOptions = '--writable-tmpfs -e --no-home -B $PWD' -singularity.autoMounts = true +docker.enabled = false env.SINGULARITY_DISABLE_CACHE = 1 +singularity { + enabled = true + envWhitelist = ['SINGULARITY_TMPDIR'] + autoMounts = true +} +params.running_engine = 'singularity' +// singularity.runOptions = '--writable-tmpfs -e --no-home -B $PWD' /* diff --git a/modules/bacannot_dbs/antismash.nf b/modules/bacannot_dbs/antismash.nf index 5e9b8962..b5b73700 100644 --- a/modules/bacannot_dbs/antismash.nf +++ b/modules/bacannot_dbs/antismash.nf @@ -6,9 +6,33 @@ process ANTISMASH_DB { file("*") script: + def antismash_version='6.1.1' + + if (params.running_engine == 'singularity') + """ + mkdir local-install + export PYTHONUSERBASE=./local-install + export PATH=/opt/conda/envs/antismash/bin:\$PATH + + # install locally so it can download dbs + # singularity has many read-write permissions for this tool + wget https://dl.secondarymetabolites.org/releases/${antismash_version}/antismash-${antismash_version}.tar.gz + tar zxvf antismash-${antismash_version}.tar.gz + python -m pip install --user ./antismash-${antismash_version} + export PYTHONPATH=\$(realpath \$( find ./local-install -name 'site-packages' )) + + # now download it + # download antismash database + ./local-install/bin/download-antismash-databases --database-dir ./ + + # delete it + rm -rf ./local-install ./antismash-${antismash_version} + """ + + else """ # download antismash database export PATH=/opt/conda/envs/antismash/bin:\$PATH - download-antismash-databases --database-dir \$(pwd) + download-antismash-databases --database-dir ./ """ } diff --git a/nextflow.config b/nextflow.config index 2432cac8..b0654f31 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,7 +22,7 @@ params { get_samplesheet = false validate_params = true show_hidden_params = false - schema_ignore_params = 'enable_conda,monochrome_logs,plaintext_email' + schema_ignore_params = 'enable_conda,monochrome_logs,plaintext_email,running_engine' enable_conda = false monochrome_logs = false From 3fbfc91f0cce6488da6caed9fa9592f976caf85d Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Thu, 7 Mar 2024 17:54:41 -0300 Subject: [PATCH 36/50] fixed installation problem for singularity --- docker/misc/Dockerfile | 11 ++--------- modules/resistance/resfinder.nf | 1 + 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index 844526a9..29b35a6d 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -47,7 +47,7 @@ RUN mamba create -y \ -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ -n digIS \ --no-channel-priority \ - 'hmmer==3.1b2' 'biopython==1.77' nomkl && \ + 'hmmer==3.1b2' 'biopython==1.75' nomkl && \ mamba clean -afy # Install pip packages @@ -72,6 +72,7 @@ COPY victors_bkp/victors_06-2022.fasta /work/victors.fasta RUN mamba create -y -n resfinder \ -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ 'resfinder>=4.1' docopt pandas && \ + chmod 777 -R /opt/conda/envs/resfinder && \ mamba clean -afy # get a copy of digis @@ -79,12 +80,6 @@ RUN git clone -b master https://github.com/janka2012/digIS.git COPY custom_fix_grange_digis.py /work/digIS/src/common/grange.py ENV PATH=/work/digIS:$PATH -# Create env for antismash -RUN mamba create -y -n antismash -c bioconda -c conda-forge \ - 'bioconda::antismash-lite>=6' 'anaconda::flask' 'anaconda::jinja2' 'anaconda::markupsafe' nomkl && \ - rm -rf /opt/conda/envs/antismash/lib/*/site-packages/antismash/databases && \ - mamba clean -afy - # fix bioperl RUN mamba create -n perl -y \ -c bioconda -c conda-forge -c anaconda -c defaults \ @@ -99,5 +94,3 @@ RUN pip3 install zenodo_get # fix permissions RUN chmod 777 -R /work -RUN chmod 777 -R /opt/conda/envs/antismash/lib/**/site-packages/antismash -RUN chmod 777 -R /opt/conda/envs/resfinder \ No newline at end of file diff --git a/modules/resistance/resfinder.nf b/modules/resistance/resfinder.nf index 9b7105e9..017efffa 100644 --- a/modules/resistance/resfinder.nf +++ b/modules/resistance/resfinder.nf @@ -24,6 +24,7 @@ process RESFINDER { """ # activate env source activate resfinder + export PATH=/opt/conda/envs/resfinder/lib/python3.12/site-packages/resfinder/:\$PATH # Run resfinder acquired resistance run_resfinder.py \\ From 39c3d1722892d86c45989d684e85fe68d1e7ffb2 Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Fri, 8 Mar 2024 08:45:30 -0300 Subject: [PATCH 37/50] split antismash docker image and modify module to have custom snippet for singularity in order to avoid problems related to read-only filesystem that happens with this tool --- docker/antismash/Dockerfile | 29 +++++++++++++++ docker/antismash/build.sh | 1 + modules/generic/antismash.nf | 70 +++++++++++++++++++++++++++++++++--- 3 files changed, 96 insertions(+), 4 deletions(-) create mode 100644 docker/antismash/Dockerfile create mode 100644 docker/antismash/build.sh diff --git a/docker/antismash/Dockerfile b/docker/antismash/Dockerfile new file mode 100644 index 00000000..3fc621fe --- /dev/null +++ b/docker/antismash/Dockerfile @@ -0,0 +1,29 @@ +FROM nfcore/base +LABEL authors="Felipe Almeida" \ + description="Docker image containing antismash for bacannot" + +# install mamba +RUN conda install \ + -n base -c conda-forge 'mamba=1.5' --yes && \ + conda clean -afy + +# set CONDA_PREFIX +ENV CONDA_PREFIX=/opt/conda + +# install antismash +RUN mamba create -y \ + -n antismash \ + -c bioconda -c conda-forge \ + 'bioconda::antismash-lite==6.1.1' 'anaconda::flask' 'anaconda::jinja2' 'anaconda::markupsafe' emboss nomkl && \ + chmod 777 -R /opt/conda/envs/antismash/lib/**/site-packages/antismash && \ + mamba clean -afy + +# update PATH variable +ENV PATH=/opt/conda/envs/antismash/bin:$PATH + +# install ubuntu packages +RUN apt-get update && apt-get install -y build-essential libtinfo5 libtiff5 libopenjp2-7 + +# fix permissions +WORKDIR /work +RUN chmod 777 -R /work diff --git a/docker/antismash/build.sh b/docker/antismash/build.sh new file mode 100644 index 00000000..51153a5c --- /dev/null +++ b/docker/antismash/build.sh @@ -0,0 +1 @@ +../../bin/build_image.sh $1 diff --git a/modules/generic/antismash.nf b/modules/generic/antismash.nf index 207c12d4..b56df0a5 100644 --- a/modules/generic/antismash.nf +++ b/modules/generic/antismash.nf @@ -6,6 +6,8 @@ process ANTISMASH { tag "${prefix}" label = [ 'misc', 'process_medium' ] + // if (params.running_engine = 'singularity') { runOptions = '--writable-tmpfs -e --no-home -B $PWD' } + input: tuple val(prefix), file(genbank) file(bacannot_db) @@ -18,10 +20,69 @@ process ANTISMASH { script: def gbk_suffix = (params.bakta_db) ? "gbff" : "gbk" def gbk_prefix = "${genbank.baseName}" - "${gbk_suffix}" - """ - # Activate env + def antismash_version='6.1.1' + + if (params.running_engine == 'singularity') + """ + # Get tool version + antismash --version > antismash_version.txt ; + + # activate env + mkdir local-install + export PYTHONUSERBASE=./local-install export PATH=/opt/conda/envs/antismash/bin:\$PATH + + # singularity has many read-write permissions for this tool + wget https://dl.secondarymetabolites.org/releases/${antismash_version}/antismash-${antismash_version}.tar.gz + tar zxvf antismash-${antismash_version}.tar.gz + python -m pip install --user ./antismash-${antismash_version} + export PYTHONPATH=\$(realpath \$( find ./local-install -name 'site-packages' )) + + # Run tool + ./local-install/bin/antismash \\ + --output-dir antiSMASH \\ + --genefinding-tool none \\ + --databases ${bacannot_db}/antismash_db \\ + -c $task.cpus \\ + $genbank ; + + # enter results dir + cd antiSMASH ; + + # produce gff from main results + seqret \\ + -sequence ${gbk_prefix}.gbk \\ + -feature \\ + -fformat genbank \\ + -fopenfile ${gbk_prefix}.gbk \\ + -osformat gff \\ + -osname_outseq ${gbk_prefix} \\ + -auto ; + + # get the locus tags annotated as list + # only when results exist + if ls *region*gbk 1> /dev/null 2>&1; then + + grep \\ + "locus_tag" \\ + *region*gbk | \\ + cut \\ + -f 2 \\ + -d "=" | \\ + tr -d '"' | \\ + sort -u > gene_ids.lst ; + + # subset regions GFF from main GFF for JBrowse + grep \\ + -w \\ + -f gene_ids.lst \\ + ${gbk_prefix}.gff > regions.gff ; + fi + """ + + else + """ # Get tool version antismash --version > antismash_version.txt ; @@ -29,10 +90,10 @@ process ANTISMASH { antismash \\ --output-dir antiSMASH \\ --genefinding-tool none \\ - -c $task.cpus \\ --databases ${bacannot_db}/antismash_db \\ + -c $task.cpus \\ $genbank ; - + # enter results dir cd antiSMASH ; @@ -67,4 +128,5 @@ process ANTISMASH { fi """ + } From 25ad59f558eb23c4f5a487232b6a1e188f72b375 Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Fri, 8 Mar 2024 08:51:25 -0300 Subject: [PATCH 38/50] also remove the tar.gz --- modules/bacannot_dbs/antismash.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/bacannot_dbs/antismash.nf b/modules/bacannot_dbs/antismash.nf index b5b73700..e554f9ff 100644 --- a/modules/bacannot_dbs/antismash.nf +++ b/modules/bacannot_dbs/antismash.nf @@ -26,7 +26,7 @@ process ANTISMASH_DB { ./local-install/bin/download-antismash-databases --database-dir ./ # delete it - rm -rf ./local-install ./antismash-${antismash_version} + rm -rf ./local-install ./antismash-${antismash_version}* """ else From a91d4e8957ad43e94072c1645b00f352aa2f4cea Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Fri, 8 Mar 2024 17:57:07 -0300 Subject: [PATCH 39/50] fix misc image --- docker/misc/Dockerfile | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docker/misc/Dockerfile b/docker/misc/Dockerfile index 29b35a6d..2d6f10f4 100644 --- a/docker/misc/Dockerfile +++ b/docker/misc/Dockerfile @@ -7,17 +7,20 @@ RUN conda install -n base -c conda-forge 'mamba=1.5' --yes && \ conda clean -afy RUN pip install --upgrade pip +# Install ubuntu packages +RUN apt-get update -y && apt-get install -y samtools libarchive13 build-essential + # Install the conda environment RUN mamba install -y \ -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ --no-channel-priority \ - 'python=3.9' \ + 'python>=3.9' \ 'blast>=2.12' \ 'diamond>=2.0.15' \ 'bedtools>=2.30' \ 'kma' \ 'nanopolish' \ - 'biopython==1.78' \ + 'biopython==1.83' \ seqkit \ bioawk \ 'easy_circos==0.4' \ @@ -27,18 +30,16 @@ RUN mamba install -y \ libtiff \ jq && \ mamba clean -afy + +# install my custom scripts RUN git clone https://github.com/fmalmeida/pythonScripts.git && \ cd pythonScripts && \ pip install . && \ falmeida-py --help -# Install samtools -RUN apt-get update -y && apt-get install -y samtools - # Install gff-toolbox RUN git clone https://github.com/fmalmeida/gff-toolbox.git RUN cd gff-toolbox && \ - python3 -m pip install --upgrade pip 'matplotlib==3.7.3' && \ python3 setup.py install && \ gff-toolbox -h @@ -47,7 +48,7 @@ RUN mamba create -y \ -c bioconda -c defaults -c conda-forge -c anaconda -c falmeida \ -n digIS \ --no-channel-priority \ - 'hmmer==3.1b2' 'biopython==1.75' nomkl && \ + 'hmmer==3.1b2' 'biopython==1.77' nomkl && \ mamba clean -afy # Install pip packages @@ -87,7 +88,7 @@ RUN mamba create -n perl -y \ RUN mamba run -n perl PERL5LIB= PERL_LOCAL_LIB_ROOT= cpanm Bio::Root::RootI # fix python -RUN python3 -m pip install cryptography==38.0.4 +RUN python3 -m pip install cryptography==38.0.4 'biopython==1.83' 'matplotlib==3.7.3' # install get zenodo RUN pip3 install zenodo_get From b7564089d76894b9336ff4c63021411c11222465 Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Fri, 8 Mar 2024 17:57:48 -0300 Subject: [PATCH 40/50] update used docker images --- conf/docker.config | 10 ++++++++-- conf/singularity.config | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/conf/docker.config b/conf/docker.config index 507d23dc..fe8600f3 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -19,7 +19,7 @@ process { // Custom pipeline's containers with various tools for general purposes // withLabel: 'db_download|db_tools|misc' { - container = 'fmalmeida/bacannot@sha256:0648797837cd8e11b6abd40745cafc0db81647953921ec54ce0ceef9ecef6450' + container = 'fmalmeida/bacannot@sha256:bdb31637cacf99736656ab3b69f1f01ba1b5eb026771d5c266b4c84e96153057' } // container for R tools @@ -32,10 +32,16 @@ process { container = 'fmalmeida/bacannot@sha256:0ec3b289d6e0c624556d125b2ed9b63499178e266a315175fd87cf020a402898' } + // container for jbrowser withLabel: 'jbrowse' { container = 'fmalmeida/bacannot@sha256:6afdca17b561bf212c1f976422aee3fe047563c32a15112a6262556d1f75201e' } + // container for antismash + withName: 'ANTISMASH|ANTISMASH_DB' { + container = 'fmalmeida/bacannot@sha256:fe42fbbfb7d4a026dafb146cb533ee7f1d9a97b25ec6df64840796c343707ebb' + } + // // Public containers used within the pipeline // @@ -96,7 +102,7 @@ process { } withName: PHIGARO { - container = "quay.io/biocontainers/phigaro:2.3.0--pyh7b7c402_0" + container = "quay.io/biocontainers/phigaro:2.4.0--pyhdfd78af_0" } withName: PHISPY { diff --git a/conf/singularity.config b/conf/singularity.config index 802662dd..e57ebeac 100644 --- a/conf/singularity.config +++ b/conf/singularity.config @@ -22,7 +22,7 @@ process { // Custom pipeline's containers with various tools for general purposes // withLabel: 'db_download|db_tools|misc' { - container = 'docker://fmalmeida/bacannot@sha256:0648797837cd8e11b6abd40745cafc0db81647953921ec54ce0ceef9ecef6450' + container = 'docker://fmalmeida/bacannot@sha256:bdb31637cacf99736656ab3b69f1f01ba1b5eb026771d5c266b4c84e96153057' } // container for R tools @@ -35,10 +35,16 @@ process { container = 'docker://fmalmeida/bacannot@sha256:0ec3b289d6e0c624556d125b2ed9b63499178e266a315175fd87cf020a402898' } + // container for jbrowser withLabel: 'jbrowse' { container = 'docker://fmalmeida/bacannot@sha256:6afdca17b561bf212c1f976422aee3fe047563c32a15112a6262556d1f75201e' } + // container for antismash + withName: 'ANTISMASH|ANTISMASH_DB' { + container = 'docker://fmalmeida/bacannot@sha256:fe42fbbfb7d4a026dafb146cb533ee7f1d9a97b25ec6df64840796c343707ebb' + } + // // Public containers used within the pipeline // @@ -100,7 +106,7 @@ process { } withName: PHIGARO { - container = "https://depot.galaxyproject.org/singularity/phigaro:2.3.0--pyh7b7c402_0" + container = "https://depot.galaxyproject.org/singularity/phigaro:2.4.0--pyhdfd78af_0" } withName: PHISPY { From b9b9b69b1566417b70aca888b56a84adfc373922 Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Fri, 8 Mar 2024 17:58:39 -0300 Subject: [PATCH 41/50] update docker image --- docs/installation.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/installation.md b/docs/installation.md index 81453f9d..753ed60b 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -29,7 +29,7 @@ The images are defined like the following: ```bash ... withLabel: 'db_download|db_tools|misc' { - container = 'fmalmeida/bacannot@sha256:0648797837cd8e11b6abd40745cafc0db81647953921ec54ce0ceef9ecef6450' + container = 'fmalmeida/bacannot@sha256:bdb31637cacf99736656ab3b69f1f01ba1b5eb026771d5c266b4c84e96153057' } ... ``` @@ -37,7 +37,7 @@ withLabel: 'db_download|db_tools|misc' { And could be downloaded like this: ```bash -docker pull fmalmeida/bacannot@sha256:0648797837cd8e11b6abd40745cafc0db81647953921ec54ce0ceef9ecef6450 +docker pull fmalmeida/bacannot@sha256:bdb31637cacf99736656ab3b69f1f01ba1b5eb026771d5c266b4c84e96153057 ``` > You would need to do it for each image. From a68d260f5fd7e384deea6ee2ff5c101da5c60321 Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Fri, 8 Mar 2024 17:58:50 -0300 Subject: [PATCH 42/50] update module --- modules/MGEs/digIS.nf | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/MGEs/digIS.nf b/modules/MGEs/digIS.nf index 665abf60..a1ad29aa 100644 --- a/modules/MGEs/digIS.nf +++ b/modules/MGEs/digIS.nf @@ -18,14 +18,8 @@ process DIGIS { script: """ - # activate env - source activate digIS - # run digIS - python3 \$(which digIS_search.py) -i $genome -g $genbank -o digIS - - # deactivate env - conda deactivate + conda run -n digIS python3 \$(which digIS_search.py) -i $genome -g $genbank -o digIS # parse digIS to get nucleotide and aminoacide # also put ids in uppercase From 19dd1fcef4d5909bad83da563b066a53bc37740a Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sat, 9 Mar 2024 10:32:52 +0000 Subject: [PATCH 43/50] try again to use auto-tests --- .github/workflows/build_kofamscan.yml | 45 ---------------- .github/workflows/test_pr_docker.yml | 65 ++++++++++++----------- .github/workflows/test_pr_singularity.yml | 41 -------------- 3 files changed, 35 insertions(+), 116 deletions(-) delete mode 100644 .github/workflows/build_kofamscan.yml delete mode 100644 .github/workflows/test_pr_singularity.yml diff --git a/.github/workflows/build_kofamscan.yml b/.github/workflows/build_kofamscan.yml deleted file mode 100644 index a7cf5947..00000000 --- a/.github/workflows/build_kofamscan.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: build-kofamscan - -on: - workflow_dispatch: - schedule: - - cron: '0 0 1 */6 *' - -jobs: - - build: - runs-on: ubuntu-latest - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} - steps: - - id: keydb - uses: pozetroninc/github-action-get-latest-release@master - with: - owner: fmalmeida - repo: bacannot - excludes: prerelease, draft - - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: download github repo - run: | - git clone https://github.com/fmalmeida/bacannot.git - - - name: Build and push docker image - id: buildx - run: | - # get more space - sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android - sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET - - # enter docker dir - cd bacannot/docker - - # login to docker - docker login -u "$DOCKERHUB_USERNAME" -p "$DOCKERHUB_PASS" - - # create image - docker build -t fmalmeida/bacannot:kofamscan_teste -f Dockerfile_kofamscan . - docker push fmalmeida/bacannot:kofamscan_teste diff --git a/.github/workflows/test_pr_docker.yml b/.github/workflows/test_pr_docker.yml index c6ebe0ae..9a643ffa 100644 --- a/.github/workflows/test_pr_docker.yml +++ b/.github/workflows/test_pr_docker.yml @@ -1,39 +1,44 @@ name: Testing new PR with docker on: pull_request: - branches: master - types: [ ready_for_review, synchronize, reopened ] + branches: [master, dev] + types: [ ready_for_review, opened, synchronize, reopened ] jobs: run_nextflow: name: Run pipeline for the upcoming PR runs-on: ubuntu-latest - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} - + steps: - - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Clean environment - run: | - sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android - sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET - - - name: Build bacannot database - run: | - nextflow run main.nf -profile docker --get_dbs --output bacannot_dbs --max_cpus 2 --max_memory '6.GB' --max_time '6.h' - rm -rf bacannot_dbs/antismash_db bacannot_dbs/kofamscan_db bacannot_dbs/prokka_db/PGAP_NCBI.hmm # remove unused in quicktest to diminish size - - - name: Run the pipeline - run: | - nextflow run main.nf -profile docker,quicktest --bacannot_db bacannot_dbs + + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Clean environment + run: | + sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android + sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET + + - name: Get database + run: | + nextflow run main.nf -profile docker --get_zenodo_db ./zenodo_db --max_memory '6.GB' --max_cpus 2 + sudo rm -r work .nextflow* + yes | docker system prune + + - name: Run quicktest profile + run: | + nextflow run main.nf -profile docker,quicktest --bacannot_db $( realpath ./zenodo_db/bac* ) --output ./results --max_memory '6.GB' --max_cpus 2 + sudo rm -r work .nextflow* + yes | docker system prune + + - name: View results + run: | + sudo apt-get install -y tree + tree ./results diff --git a/.github/workflows/test_pr_singularity.yml b/.github/workflows/test_pr_singularity.yml deleted file mode 100644 index b8bd776c..00000000 --- a/.github/workflows/test_pr_singularity.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: Testing new PR with singularity -on: - pull_request: - branches: [ master, dev, develop ] - types: [ ready_for_review, synchronize, reopened ] - -jobs: - run_nextflow: - name: Run pipeline for the upcoming PR - runs-on: ubuntu-latest - - steps: - - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Install Singularity - uses: eWaterCycle/setup-singularity@v7 - with: - singularity-version: 3.8.3 - - - name: Clean environment - run: | - sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android - sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET - - - name: Build bacannot database - run: | - nextflow run main.nf -profile singularity --get_dbs --output bacannot_dbs --max_cpus 2 --max_memory '6.GB' --max_time '6.h' - rm -rf bacannot_dbs/antismash_db bacannot_dbs/kofamscan_db bacannot_dbs/prokka_db/PGAP_NCBI.hmm # remove unused in quicktest to diminish size - - - name: Run the pipeline - run: | - nextflow run main.nf -profile singularity,quicktest --bacannot_db bacannot_dbs From dd0c344de704998aa7f5da573646754258251a3a Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sat, 9 Mar 2024 10:34:08 +0000 Subject: [PATCH 44/50] simplify types --- .github/workflows/test_pr_docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_pr_docker.yml b/.github/workflows/test_pr_docker.yml index 9a643ffa..f7863fdd 100644 --- a/.github/workflows/test_pr_docker.yml +++ b/.github/workflows/test_pr_docker.yml @@ -2,7 +2,7 @@ name: Testing new PR with docker on: pull_request: branches: [master, dev] - types: [ ready_for_review, opened, synchronize, reopened ] + types: [ opened, synchronize, reopened ] jobs: run_nextflow: From 126d37f76b1009ce09edb8cafeb5e34499d6b95b Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sat, 9 Mar 2024 10:40:28 +0000 Subject: [PATCH 45/50] update changelog --- markdown/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index b6593da6..efb8ed72 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -7,6 +7,7 @@ The tracking for changes started in v2.1 * [[#118](https://github.com/fmalmeida/bacannot/issues/116)] * Add a parameter to allow user to skip `INTEGRON_FINDER` execution. * Add a parameter to allow user to skip `CIRCOS` execution. +* Split antismash docker image, and added some snippets to fix some workarounds to allow pipeline to run with singularity ## v3.3.2 [09-February-2024] From 696ab3004d8d3c7dbf7934d35d72c3081e239a34 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sat, 9 Mar 2024 11:10:18 +0000 Subject: [PATCH 46/50] fix cli --- .github/workflows/test_pr_docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_pr_docker.yml b/.github/workflows/test_pr_docker.yml index f7863fdd..6eb23f99 100644 --- a/.github/workflows/test_pr_docker.yml +++ b/.github/workflows/test_pr_docker.yml @@ -28,7 +28,7 @@ jobs: - name: Get database run: | - nextflow run main.nf -profile docker --get_zenodo_db ./zenodo_db --max_memory '6.GB' --max_cpus 2 + nextflow run main.nf -profile docker --get_zenodo_db --output ./zenodo_db --max_memory '6.GB' --max_cpus 2 sudo rm -r work .nextflow* yes | docker system prune From 80d34abbc70e833e347a15de210ab7d42f97f516 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sat, 9 Mar 2024 12:20:30 +0000 Subject: [PATCH 47/50] it fails to publish, lets resume --- .github/workflows/test_pr_docker.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_pr_docker.yml b/.github/workflows/test_pr_docker.yml index 6eb23f99..24f9f90d 100644 --- a/.github/workflows/test_pr_docker.yml +++ b/.github/workflows/test_pr_docker.yml @@ -28,13 +28,15 @@ jobs: - name: Get database run: | - nextflow run main.nf -profile docker --get_zenodo_db --output ./zenodo_db --max_memory '6.GB' --max_cpus 2 + nextflow run main.nf -profile docker --get_zenodo_db --output ./ --max_memory '6.GB' --max_cpus 2 + nextflow run main.nf -profile docker --get_zenodo_db --output ./ --max_memory '6.GB' --max_cpus 2 -resume + nextflow run main.nf -profile docker --get_zenodo_db --output ./ --max_memory '6.GB' --max_cpus 2 -resume sudo rm -r work .nextflow* yes | docker system prune - name: Run quicktest profile run: | - nextflow run main.nf -profile docker,quicktest --bacannot_db $( realpath ./zenodo_db/bac* ) --output ./results --max_memory '6.GB' --max_cpus 2 + nextflow run main.nf -profile docker,quicktest --bacannot_db $( realpath ./bac* ) --output ./results --max_memory '6.GB' --max_cpus 2 sudo rm -r work .nextflow* yes | docker system prune From 74f5100d99b157ccbb922731b2aa9b898c58d642 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 11 Mar 2024 07:56:54 +0000 Subject: [PATCH 48/50] add better information on singularity variables --- docs/installation.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/installation.md b/docs/installation.md index 753ed60b..2bc4a5fa 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -52,7 +52,16 @@ docker pull fmalmeida/bacannot@sha256:bdb31637cacf99736656ab3b69f1f01ba1b5eb0267 # apply this command to each image # just change the "/" and ":" for "-". # E.g. Image fmalmeida/bacannot:v3.3_misc becomes fmalmeida-bacannot-v3.3_misc.img - singularity pull --dir $NXF_SINGULARITY_LIBRARYDIR fmalmeida-bacannot-v3.3_misc.img docker://fmalmeida/bacannot:v3.3_misc + # for singularity --> prepare env variables + # remember to properly set NXF_SINGULARITY_LIBRARYDIR + # read more at https://www.nextflow.io/docs/latest/singularity.html#singularity-docker-hub + export NXF_SINGULARITY_LIBRARYDIR= # Set a path to your singularity storage dir + export NXF_SINGULARITY_CACHEDIR= # Set a path to your singularity cache dir + export SINGULARITY_CACHEDIR= # Set a path to your singularity cache dir + + singularity pull \ + --dir $NXF_SINGULARITY_LIBRARYDIR \ + fmalmeida-bacannot-v3.3_misc.img docker://fmalmeida/bacannot:v3.3_misc ``` ## Bacannot databases @@ -90,9 +99,9 @@ nextflow run fmalmeida/bacannot \ After that, you can run the pipeline with a testing dataset by selecting one of the available profiles: 1. Docker - * `nextflow run fmalmeida/mpgap -profile docker,test` --bacannot_db ./bacannot_dbs + * `nextflow run fmalmeida/mpgap -profile docker,test --bacannot_db ./bacannot_dbs` 2. Singularity - * `nextflow run fmalmeida/mpgap -profile singularity,test` --bacannot_db ./bacannot_dbs + * `nextflow run fmalmeida/mpgap -profile singularity,test --bacannot_db ./bacannot_dbs` !!! note "About NF profiles" From 0cb1b00018e9f42199564af8c0cc93c98563fd2c Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 11 Mar 2024 10:02:34 +0000 Subject: [PATCH 49/50] fix paths of where versions are stored --- modules/MGEs/integron_finder.nf | 4 ++-- modules/MGEs/mob_suite.nf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/MGEs/integron_finder.nf b/modules/MGEs/integron_finder.nf index b06fccdf..1cbfc14e 100644 --- a/modules/MGEs/integron_finder.nf +++ b/modules/MGEs/integron_finder.nf @@ -1,7 +1,7 @@ process INTEGRON_FINDER { - publishDir "${params.output}", mode: 'copy', saveAs: { filename -> + publishDir "${params.output}/${prefix}", mode: 'copy', saveAs: { filename -> if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" - else "${prefix}/integron_finder/$filename" + else "integron_finder/$filename" } tag "${prefix}" label = [ 'process_medium' ] diff --git a/modules/MGEs/mob_suite.nf b/modules/MGEs/mob_suite.nf index 14256e92..0ae1c3d3 100644 --- a/modules/MGEs/mob_suite.nf +++ b/modules/MGEs/mob_suite.nf @@ -1,7 +1,7 @@ process MOBSUITE { - publishDir "${params.output}", mode: 'copy', saveAs: { filename -> + publishDir "${params.output}/${prefix}", mode: 'copy', saveAs: { filename -> if (filename.indexOf("_version.txt") > 0) "tools_versioning/$filename" - else "${prefix}/plasmids/mob_suite/$filename" + else "plasmids/mob_suite/$filename" } tag "${prefix}" label = [ 'process_medium' ] From 2ff9e2dec70e8d1adf97b13fb88b079f37dd5813 Mon Sep 17 00:00:00 2001 From: Felipe Almeida Date: Mon, 11 Mar 2024 14:25:49 -0300 Subject: [PATCH 50/50] update changelog with date --- markdown/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index efb8ed72..03654f50 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -2,7 +2,7 @@ The tracking for changes started in v2.1 -## v3.3.3 [TBD] +## v3.3.3 [11-March-2024] * [[#118](https://github.com/fmalmeida/bacannot/issues/116)] * Add a parameter to allow user to skip `INTEGRON_FINDER` execution.