From 4b918c5ba595027effb666857cef075ec34980c5 Mon Sep 17 00:00:00 2001
From: Lili Andersson-Li <64467552+LilyAnderssonLee@users.noreply.github.com>
Date: Mon, 29 Apr 2024 10:23:06 +0200
Subject: [PATCH 01/35] bump version
---
CHANGELOG.md | 10 ++++++++++
assets/multiqc_config.yml | 4 ++--
nextflow.config | 2 +-
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d607106e..b1435a66 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,16 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## v1.1.8dev - Augmented Akita Patch []
+
+### `Added`
+
+### `Fixed`
+
+### `Dependencies`
+
+### `Deprecated`
+
## v1.1.7 - Augmented Akita Patch [2024-04-25]
### `Added`
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index b9c37c75..e2fffe8a 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,7 +1,7 @@
report_comment: >
- This report has been generated by the nf-core/taxprofiler
+ This report has been generated by the nf-core/taxprofiler
analysis pipeline. For information about how to interpret these results, please see the
- documentation.
+ documentation.
report_section_order:
"nf-core-taxprofiler-methods-description":
diff --git a/nextflow.config b/nextflow.config
index 0176e77a..4a125626 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -378,7 +378,7 @@ manifest {
description = """Taxonomic classification and profiling of shotgun short- and long-read metagenomic data"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '1.1.7'
+ version = '1.1.8'
doi = '10.1101/2023.10.20.563221'
}
From 24ec75b2944063466c94d9001cce0f717c8cc01a Mon Sep 17 00:00:00 2001
From: nf-core-bot
Date: Wed, 8 May 2024 14:58:14 +0000
Subject: [PATCH 02/35] Template update for nf-core/tools version 2.14.0
---
.editorconfig | 6 +-
.github/PULL_REQUEST_TEMPLATE.md | 2 +-
.github/workflows/awsfulltest.yml | 10 +-
.github/workflows/awstest.yml | 12 +-
.github/workflows/ci.yml | 4 +-
.github/workflows/download_pipeline.yml | 22 ++-
.github/workflows/fix-linting.yml | 6 +-
.github/workflows/linting.yml | 18 +-
.github/workflows/linting_comment.yml | 2 +-
.github/workflows/release-announcements.yml | 6 +-
.nf-core.yml | 1 +
.pre-commit-config.yaml | 3 +
CHANGELOG.md | 2 +-
README.md | 2 +-
assets/multiqc_config.yml | 6 +-
conf/base.config | 3 -
conf/modules.config | 8 -
conf/test.config | 2 +-
conf/test_full.config | 2 +-
docs/usage.md | 2 +
modules.json | 4 +-
modules/nf-core/fastqc/main.nf | 6 +
nextflow.config | 178 +++++++++---------
nextflow_schema.json | 7 +
pyproject.toml | 15 --
.../utils_nfcore_taxprofiler_pipeline/main.nf | 16 +-
.../nf-core/utils_nfcore_pipeline/main.nf | 8 +-
workflows/taxprofiler.nf | 46 +++--
28 files changed, 226 insertions(+), 173 deletions(-)
delete mode 100644 pyproject.toml
diff --git a/.editorconfig b/.editorconfig
index dd9ffa53..72dda289 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -28,10 +28,6 @@ indent_style = unset
[/assets/email*]
indent_size = unset
-# ignore Readme
-[README.md]
-indent_style = unset
-
-# ignore python
+# ignore python and markdown
[*.{py,md}]
indent_style = unset
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 888970b6..69a2dd8e 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/taxp
- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/taxprofiler/tree/master/.github/CONTRIBUTING.md)
- [ ] If necessary, also make a PR on the nf-core/taxprofiler _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
- [ ] Make sure your code lints (`nf-core lint`).
-- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`).
+- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `).
- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `).
- [ ] Usage Documentation in `docs/usage.md` is updated.
- [ ] Output Documentation in `docs/output.md` is updated.
diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
index 47fda249..da8c95dd 100644
--- a/.github/workflows/awsfulltest.yml
+++ b/.github/workflows/awsfulltest.yml
@@ -8,12 +8,12 @@ on:
types: [published]
workflow_dispatch:
jobs:
- run-tower:
+ run-platform:
name: Run AWS full tests
if: github.repository == 'nf-core/taxprofiler'
runs-on: ubuntu-latest
steps:
- - name: Launch workflow via tower
+ - name: Launch workflow via Seqera Platform
uses: seqeralabs/action-tower-launch@v2
# TODO nf-core: You can customise AWS full pipeline tests as required
# Add full size test data (but still relatively small datasets for few samples)
@@ -33,7 +33,7 @@ jobs:
- uses: actions/upload-artifact@v4
with:
- name: Tower debug log file
+ name: Seqera Platform debug log file
path: |
- tower_action_*.log
- tower_action_*.json
+ seqera_platform_action_*.log
+ seqera_platform_action_*.json
diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml
index 7b722e77..08f95b7e 100644
--- a/.github/workflows/awstest.yml
+++ b/.github/workflows/awstest.yml
@@ -5,13 +5,13 @@ name: nf-core AWS test
on:
workflow_dispatch:
jobs:
- run-tower:
+ run-platform:
name: Run AWS tests
if: github.repository == 'nf-core/taxprofiler'
runs-on: ubuntu-latest
steps:
- # Launch workflow using Tower CLI tool action
- - name: Launch workflow via tower
+ # Launch workflow using Seqera Platform CLI tool action
+ - name: Launch workflow via Seqera Platform
uses: seqeralabs/action-tower-launch@v2
with:
workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
@@ -27,7 +27,7 @@ jobs:
- uses: actions/upload-artifact@v4
with:
- name: Tower debug log file
+ name: Seqera Platform debug log file
path: |
- tower_action_*.log
- tower_action_*.json
+ seqera_platform_action_*.log
+ seqera_platform_action_*.json
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 48699335..72f908a1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,10 +28,10 @@ jobs:
- "latest-everything"
steps:
- name: Check out pipeline code
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+ uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
- name: Install Nextflow
- uses: nf-core/setup-nextflow@v1
+ uses: nf-core/setup-nextflow@v2
with:
version: "${{ matrix.NXF_VER }}"
diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml
index 08622fd5..2d20d644 100644
--- a/.github/workflows/download_pipeline.yml
+++ b/.github/workflows/download_pipeline.yml
@@ -14,6 +14,8 @@ on:
pull_request:
types:
- opened
+ - edited
+ - synchronize
branches:
- master
pull_request_target:
@@ -28,11 +30,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Install Nextflow
- uses: nf-core/setup-nextflow@v1
+ uses: nf-core/setup-nextflow@v2
- - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+ - name: Disk space cleanup
+ uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+
+ - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
with:
- python-version: "3.11"
+ python-version: "3.12"
architecture: "x64"
- uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7
with:
@@ -65,8 +70,17 @@ jobs:
- name: Inspect download
run: tree ./${{ env.REPOTITLE_LOWERCASE }}
- - name: Run the downloaded pipeline
+ - name: Run the downloaded pipeline (stub)
+ id: stub_run_pipeline
+ continue-on-error: true
env:
NXF_SINGULARITY_CACHEDIR: ./
NXF_SINGULARITY_HOME_MOUNT: true
run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results
+ - name: Run the downloaded pipeline (stub run not supported)
+ id: run_pipeline
+ if: ${{ job.steps.stub_run_pipeline.status == failure() }}
+ env:
+ NXF_SINGULARITY_CACHEDIR: ./
+ NXF_SINGULARITY_HOME_MOUNT: true
+ run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results
diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml
index 74c1ce02..7a068de2 100644
--- a/.github/workflows/fix-linting.yml
+++ b/.github/workflows/fix-linting.yml
@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
steps:
# Use the @nf-core-bot token to check out so we can push later
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+ - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
with:
token: ${{ secrets.nf_core_bot_auth_token }}
@@ -32,9 +32,9 @@ jobs:
GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }}
# Install and run pre-commit
- - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+ - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
with:
- python-version: 3.11
+ python-version: "3.12"
- name: Install pre-commit
run: pip install pre-commit
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 073e1876..a3fb2541 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -14,12 +14,12 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+ - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
- - name: Set up Python 3.11
- uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+ - name: Set up Python 3.12
+ uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
with:
- python-version: 3.11
+ python-version: "3.12"
cache: "pip"
- name: Install pre-commit
@@ -32,14 +32,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out pipeline code
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+ uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
- name: Install Nextflow
- uses: nf-core/setup-nextflow@v1
+ uses: nf-core/setup-nextflow@v2
- - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+ - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
with:
- python-version: "3.11"
+ python-version: "3.12"
architecture: "x64"
- name: Install dependencies
@@ -60,7 +60,7 @@ jobs:
- name: Upload linting log file artifact
if: ${{ always() }}
- uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4
+ uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4
with:
name: linting-logs
path: |
diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml
index b706875f..40acc23f 100644
--- a/.github/workflows/linting_comment.yml
+++ b/.github/workflows/linting_comment.yml
@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download lint results
- uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3
+ uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3
with:
workflow: linting.yml
workflow_conclusion: completed
diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml
index d468aeaa..03ecfcf7 100644
--- a/.github/workflows/release-announcements.yml
+++ b/.github/workflows/release-announcements.yml
@@ -12,7 +12,7 @@ jobs:
- name: get topics and convert to hashtags
id: get_topics
run: |
- curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT
+ echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT
- uses: rzr/fediverse-action@master
with:
@@ -25,13 +25,13 @@ jobs:
Please see the changelog: ${{ github.event.release.html_url }}
- ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics
+ ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics
send-tweet:
runs-on: ubuntu-latest
steps:
- - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+ - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
with:
python-version: "3.10"
- name: Install dependencies
diff --git a/.nf-core.yml b/.nf-core.yml
index 3805dc81..d6daa403 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1 +1,2 @@
repository_type: pipeline
+nf_core_version: "2.14.0"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index af57081f..4dc0f1dc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,6 +3,9 @@ repos:
rev: "v3.1.0"
hooks:
- id: prettier
+ additional_dependencies:
+ - prettier@3.2.5
+
- repo: https://github.com/editorconfig-checker/editorconfig-checker.python
rev: "2.7.3"
hooks:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 313368e1..9147a2ff 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## v1.1.6dev - [date]
+## v1.1.8 - [date]
Initial release of nf-core/taxprofiler, created with the [nf-core](https://nf-co.re/) template.
diff --git a/README.md b/README.md
index 925afc70..51ee4946 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
-[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/taxprofiler)
+[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/taxprofiler)
[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23taxprofiler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/taxprofiler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 8e75884e..a94ccdf3 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,7 +1,9 @@
report_comment: >
- This report has been generated by the nf-core/taxprofiler
+
+ This report has been generated by the nf-core/taxprofiler
analysis pipeline. For information about how to interpret these results, please see the
- documentation.
+ documentation.
+
report_section_order:
"nf-core-taxprofiler-methods-description":
order: -1000
diff --git a/conf/base.config b/conf/base.config
index 372f0798..05b8bb74 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -59,7 +59,4 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}
- withName:CUSTOM_DUMPSOFTWAREVERSIONS {
- cache = false
- }
}
diff --git a/conf/modules.config b/conf/modules.config
index e3ea8fa6..d203d2b6 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -22,14 +22,6 @@ process {
ext.args = '--quiet'
}
- withName: CUSTOM_DUMPSOFTWAREVERSIONS {
- publishDir = [
- path: { "${params.outdir}/pipeline_info" },
- mode: params.publish_dir_mode,
- pattern: '*_versions.yml'
- ]
- }
-
withName: 'MULTIQC' {
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
diff --git a/conf/test.config b/conf/test.config
index 42772cfe..c60bebf8 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -22,7 +22,7 @@ params {
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
+ input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
// Genome references
genome = 'R64-1-1'
diff --git a/conf/test_full.config b/conf/test_full.config
index 49a10a0f..8ad76741 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -17,7 +17,7 @@ params {
// Input data for full size test
// TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
// TODO nf-core: Give any required params for the test so that command line flags are not needed
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
+ input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
// Genome references
genome = 'R64-1-1'
diff --git a/docs/usage.md b/docs/usage.md
index 286da89c..cfd7f3e2 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -156,6 +156,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
- A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/)
- `apptainer`
- A generic configuration profile to be used with [Apptainer](https://apptainer.org/)
+- `wave`
+ - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later).
- `conda`
- A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer.
diff --git a/modules.json b/modules.json
index be780a86..e0e3d5e9 100644
--- a/modules.json
+++ b/modules.json
@@ -7,7 +7,7 @@
"nf-core": {
"fastqc": {
"branch": "master",
- "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c",
+ "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
"installed_by": ["modules"]
},
"multiqc": {
@@ -26,7 +26,7 @@
},
"utils_nfcore_pipeline": {
"branch": "master",
- "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
+ "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
"installed_by": ["subworkflows"]
},
"utils_nfvalidation_plugin": {
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 9e19a74c..d79f1c86 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -25,6 +25,11 @@ process FASTQC {
def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
def rename_to = old_new_pairs*.join(' ').join(' ')
def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+
+ def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB')
+ // FastQC memory value allowed range (100 - 10000)
+ def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
+
"""
printf "%s %s\\n" $rename_to | while read old_name new_name; do
[ -f "\${new_name}" ] || ln -s \$old_name \$new_name
@@ -33,6 +38,7 @@ process FASTQC {
fastqc \\
$args \\
--threads $task.cpus \\
+ --memory $fastqc_memory \\
$renamed_files
cat <<-END_VERSIONS > versions.yml
diff --git a/nextflow.config b/nextflow.config
index c8212265..5f1880d8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -16,7 +16,8 @@ params {
genome = null
igenomes_base = 's3://ngi-igenomes/igenomes/'
igenomes_ignore = false
- fasta = null// MultiQC options
+
+ // MultiQC options
multiqc_config = null
multiqc_title = null
multiqc_logo = null
@@ -24,15 +25,16 @@ params {
multiqc_methods_description = null
// Boilerplate options
- outdir = null
- publish_dir_mode = 'copy'
- email = null
- email_on_fail = null
- plaintext_email = false
- monochrome_logs = false
- hook_url = null
- help = false
- version = false
+ outdir = null
+ publish_dir_mode = 'copy'
+ email = null
+ email_on_fail = null
+ plaintext_email = false
+ monochrome_logs = false
+ hook_url = null
+ help = false
+ version = false
+ pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/'
// Config options
config_profile_name = null
@@ -68,103 +70,109 @@ try {
}
// Load nf-core/taxprofiler custom profiles from different institutions.
-// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs!
-// try {
-// includeConfig "${params.custom_config_base}/pipeline/taxprofiler.config"
-// } catch (Exception e) {
-// System.err.println("WARNING: Could not load nf-core/config/taxprofiler profiles: ${params.custom_config_base}/pipeline/taxprofiler.config")
-// }
+try {
+ includeConfig "${params.custom_config_base}/pipeline/taxprofiler.config"
+} catch (Exception e) {
+ System.err.println("WARNING: Could not load nf-core/config/taxprofiler profiles: ${params.custom_config_base}/pipeline/taxprofiler.config")
+}
profiles {
debug {
- dumpHashes = true
- process.beforeScript = 'echo $HOSTNAME'
- cleanup = false
+ dumpHashes = true
+ process.beforeScript = 'echo $HOSTNAME'
+ cleanup = false
nextflow.enable.configProcessNamesValidation = true
}
conda {
- conda.enabled = true
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
- channels = ['conda-forge', 'bioconda', 'defaults']
- apptainer.enabled = false
+ conda.enabled = true
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ conda.channels = ['conda-forge', 'bioconda', 'defaults']
+ apptainer.enabled = false
}
mamba {
- conda.enabled = true
- conda.useMamba = true
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
- apptainer.enabled = false
+ conda.enabled = true
+ conda.useMamba = true
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ apptainer.enabled = false
}
docker {
- docker.enabled = true
- conda.enabled = false
- singularity.enabled = false
- podman.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
- apptainer.enabled = false
- docker.runOptions = '-u $(id -u):$(id -g)'
+ docker.enabled = true
+ conda.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ apptainer.enabled = false
+ docker.runOptions = '-u $(id -u):$(id -g)'
}
arm {
- docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
+ docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
}
singularity {
- singularity.enabled = true
- singularity.autoMounts = true
- conda.enabled = false
- docker.enabled = false
- podman.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
- apptainer.enabled = false
+ singularity.enabled = true
+ singularity.autoMounts = true
+ conda.enabled = false
+ docker.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ apptainer.enabled = false
}
podman {
- podman.enabled = true
- conda.enabled = false
- docker.enabled = false
- singularity.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
- apptainer.enabled = false
+ podman.enabled = true
+ conda.enabled = false
+ docker.enabled = false
+ singularity.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ apptainer.enabled = false
}
shifter {
- shifter.enabled = true
- conda.enabled = false
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- charliecloud.enabled = false
- apptainer.enabled = false
+ shifter.enabled = true
+ conda.enabled = false
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ charliecloud.enabled = false
+ apptainer.enabled = false
}
charliecloud {
- charliecloud.enabled = true
- conda.enabled = false
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- shifter.enabled = false
- apptainer.enabled = false
+ charliecloud.enabled = true
+ conda.enabled = false
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ apptainer.enabled = false
}
apptainer {
- apptainer.enabled = true
- apptainer.autoMounts = true
- conda.enabled = false
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
+ apptainer.enabled = true
+ apptainer.autoMounts = true
+ conda.enabled = false
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ }
+ wave {
+ apptainer.ociAutoPull = true
+ singularity.ociAutoPull = true
+ wave.enabled = true
+ wave.freeze = true
+ wave.strategy = 'conda,container'
}
gitpod {
- executor.name = 'local'
- executor.cpus = 4
- executor.memory = 8.GB
+ executor.name = 'local'
+ executor.cpus = 4
+ executor.memory = 8.GB
}
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
@@ -231,7 +239,7 @@ manifest {
description = """Taxonomic classification and profiling of shotgun short- and long-read metagenomic data"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '1.1.6dev'
+ version = '1.1.8'
doi = ''
}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b7fd15d7..5f7b2f63 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -265,6 +265,13 @@
"description": "Validation of parameters in lenient more.",
"hidden": true,
"help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
+ },
+ "pipelines_testdata_base_path": {
+ "type": "string",
+ "fa_icon": "far fa-check-circle",
+ "description": "Base URL or local path to location of pipeline test dataset files",
+ "default": "https://raw.githubusercontent.com/nf-core/test-datasets/",
+ "hidden": true
}
}
}
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 56110621..00000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff.
-# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation.
-[tool.ruff]
-line-length = 120
-target-version = "py38"
-cache-dir = "~/.cache/ruff"
-
-[tool.ruff.lint]
-select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"]
-
-[tool.ruff.lint.isort]
-known-first-party = ["nf_core"]
-
-[tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["E402", "F401"]
diff --git a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
index 312db322..e4b9018c 100644
--- a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
@@ -140,6 +140,10 @@ workflow PIPELINE_COMPLETION {
imNotification(summary_params, hook_url)
}
}
+
+ workflow.onError {
+ log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting"
+ }
}
/*
@@ -230,8 +234,16 @@ def methodsDescriptionText(mqc_methods_yaml) {
meta["manifest_map"] = workflow.manifest.toMap()
// Pipeline DOI
- meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : ""
- meta["nodoi_text"] = meta.manifest_map.doi ? "": "
If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
"
+ if (meta.manifest_map.doi) {
+ // Using a loop to handle multiple DOIs
+ // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
+ // Removing ` ` since the manifest.doi is a string and not a proper list
+ def temp_doi_ref = ""
+ String[] manifest_doi = meta.manifest_map.doi.tokenize(",")
+ for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), "
+ meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2)
+ } else meta["doi_text"] = ""
+ meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
"
// Tool references
meta["tool_citations"] = ""
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
index a8b55d6f..14558c39 100644
--- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
@@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) {
// Citation string for pipeline
//
def workflowCitation() {
+ def temp_doi_ref = ""
+ String[] manifest_doi = workflow.manifest.doi.tokenize(",")
+ // Using a loop to handle multiple DOIs
+ // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
+ // Removing ` ` since the manifest.doi is a string and not a proper list
+ for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n"
return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
"* The pipeline\n" +
- " ${workflow.manifest.doi}\n\n" +
+ temp_doi_ref + "\n" +
"* The nf-core framework\n" +
" https://doi.org/10.1038/s41587-020-0439-x\n\n" +
"* Software dependencies\n" +
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index e722ebaa..ae4f6268 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -40,22 +40,44 @@ workflow TAXPROFILER {
// Collate and save software versions
//
softwareVersionsToYAML(ch_versions)
- .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true)
- .set { ch_collated_versions }
+ .collectFile(
+ storeDir: "${params.outdir}/pipeline_info",
+ name: 'nf_core_pipeline_software_mqc_versions.yml',
+ sort: true,
+ newLine: true
+ ).set { ch_collated_versions }
//
// MODULE: MultiQC
//
- ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
- ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty()
- ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty()
- summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")
- ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))
- ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
- ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description))
- ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
- ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions)
- ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false))
+ ch_multiqc_config = Channel.fromPath(
+ "$projectDir/assets/multiqc_config.yml", checkIfExists: true)
+ ch_multiqc_custom_config = params.multiqc_config ?
+ Channel.fromPath(params.multiqc_config, checkIfExists: true) :
+ Channel.empty()
+ ch_multiqc_logo = params.multiqc_logo ?
+ Channel.fromPath(params.multiqc_logo, checkIfExists: true) :
+ Channel.empty()
+
+ summary_params = paramsSummaryMap(
+ workflow, parameters_schema: "nextflow_schema.json")
+ ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))
+
+ ch_multiqc_custom_methods_description = params.multiqc_methods_description ?
+ file(params.multiqc_methods_description, checkIfExists: true) :
+ file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
+ ch_methods_description = Channel.value(
+ methodsDescriptionText(ch_multiqc_custom_methods_description))
+
+ ch_multiqc_files = ch_multiqc_files.mix(
+ ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
+ ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions)
+ ch_multiqc_files = ch_multiqc_files.mix(
+ ch_methods_description.collectFile(
+ name: 'methods_description_mqc.yaml',
+ sort: true
+ )
+ )
MULTIQC (
ch_multiqc_files.collect(),
From 4726da5f186c7c577b139c9d9dadf6038d3ea3eb Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Wed, 8 May 2024 21:06:16 +0200
Subject: [PATCH 03/35] Make input validation stricter to ensure run_accessions
are unique within each sample
---
assets/schema_input.json | 1 +
1 file changed, 1 insertion(+)
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 6acc00f7..cc335436 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -14,6 +14,7 @@
},
"run_accession": {
"type": "string",
+ "unique": ["sample"],
"errorMessage": "Run accession must be provided and cannot contain spaces."
},
"instrument_platform": {
From 5ffcedf6ff36c83b907c43b8b6b6f56c12a0fe52 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Wed, 8 May 2024 21:08:47 +0200
Subject: [PATCH 04/35] Update CHANGELOG
---
CHANGELOG.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b1435a66..15660910 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Fixed`
+- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133)
+
### `Dependencies`
### `Deprecated`
From 71328f85b2140d6c2f6a2ff04fee51a127844339 Mon Sep 17 00:00:00 2001
From: nf-core-bot
Date: Wed, 8 May 2024 19:09:56 +0000
Subject: [PATCH 05/35] [automated] Fix code linting
---
CHANGELOG.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 15660910..a2f9d25a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Fixed`
-- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133)
+- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133)
### `Dependencies`
From c19d1c75fe4e6b7be6b5b62ae63bb4816735ce97 Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Date: Wed, 8 May 2024 23:07:58 +0200
Subject: [PATCH 06/35] Update_documentation
---
docs/usage.md | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/docs/usage.md b/docs/usage.md
index edeca74d..00cdb564 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -46,11 +46,13 @@ The `sample` identifiers have to be the same when you have re-sequenced the same
```csv title="samplesheet.csv"
sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
-2612,run1,ILLUMINA,2612_run1_R1.fq.gz,,
-2612,run2,ILLUMINA,2612_run2_R1.fq.gz,,
-2612,run3,ILLUMINA,2612_run3_R1.fq.gz,2612_run3_R2.fq.gz,
+2612,lane1,ILLUMINA,2612_lane1_R1.fq.gz,ILLUMINA,2612_lane1_R2.fq.gz,
+2612,lane2,ILLUMINA,2612_lane2_R1.fq.gz,ILLUMINA,2612_lane2_R2.fq.gz,
+2612,lane3,ILLUMINA,2612_lane3_R1.fq.gz,,
```
+Please note that the column name `run_accession` is following the ENA terms and is a single or paired-end set of demultiplexed FASTQs. Given that demultiplexing happens per lane, each sequencing pair from each lane is a 'run' and therefore you get multiple 'runs' per sample, which can span across both lanes and sequencing libraries.
+
:::warning
Runs of the same sample sequenced on Illumina platforms with a combination of single and paired-end data will **not** be run-wise concatenated, unless pair-merging is specified. In the example above, `run3` will be profiled independently of `run1` and `run2` if pairs are not merged.
:::
From 8699a98736f65a49b380c4b8320caf34198faebd Mon Sep 17 00:00:00 2001
From: nf-core-bot
Date: Thu, 9 May 2024 11:43:36 +0000
Subject: [PATCH 07/35] Template update for nf-core/tools version 2.14.1
---
.github/workflows/linting.yml | 1 -
.nf-core.yml | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index a3fb2541..1fcafe88 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -20,7 +20,6 @@ jobs:
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
with:
python-version: "3.12"
- cache: "pip"
- name: Install pre-commit
run: pip install pre-commit
diff --git a/.nf-core.yml b/.nf-core.yml
index d6daa403..e0b85a77 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,2 +1,2 @@
repository_type: pipeline
-nf_core_version: "2.14.0"
+nf_core_version: "2.14.1"
From 6ded3101f7668fe874dd2fabb56c3cf1f6de2431 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 08:40:44 +0200
Subject: [PATCH 08/35] Fix linting
---
conf/modules.config | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/conf/modules.config b/conf/modules.config
index d8e704e5..1956605e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -788,7 +788,7 @@ process {
pattern: '*.{tsv,csv,arrow,parquet,biom}'
]
}
-
+
withName: 'MULTIQC' {
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
From aa3e5528a6c25ba0afd70591e3fd143c4fb5ad0d Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 08:43:23 +0200
Subject: [PATCH 09/35] Wholesale replacement of hard link with param for test
data
---
conf/test.config | 2 +-
conf/test_adapterremoval.config | 6 +++---
conf/test_bbduk.config | 6 +++---
conf/test_falco.config | 6 +++---
conf/test_fastp.config | 6 +++---
conf/test_krakenuniq.config | 8 ++++----
conf/test_malt.config | 6 +++---
conf/test_motus.config | 4 ++--
conf/test_nopreprocessing.config | 6 +++---
conf/test_noprofiling.config | 6 +++---
conf/test_nothing.config | 6 +++---
conf/test_prinseqplusplus.config | 6 +++---
12 files changed, 34 insertions(+), 34 deletions(-)
diff --git a/conf/test.config b/conf/test.config
index 90b4a2bd..e8cd48da 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -21,7 +21,7 @@ params {
// Input data
input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
- databases = params.pipelines_testdata_base_path + '/taxprofiler/database_v1.1.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_mergepairs = true
diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config
index c3422d02..9f9d5b43 100644
--- a/conf/test_adapterremoval.config
+++ b/conf/test_adapterremoval.config
@@ -20,8 +20,8 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_tool = 'adapterremoval'
@@ -29,7 +29,7 @@ params {
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
diff --git a/conf/test_bbduk.config b/conf/test_bbduk.config
index 623fe191..e92fea8c 100644
--- a/conf/test_bbduk.config
+++ b/conf/test_bbduk.config
@@ -20,8 +20,8 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
perform_shortread_complexityfilter = true
@@ -29,7 +29,7 @@ params {
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
diff --git a/conf/test_falco.config b/conf/test_falco.config
index 3fb77c03..03d80593 100644
--- a/conf/test_falco.config
+++ b/conf/test_falco.config
@@ -20,8 +20,8 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
preprocessing_qc_tool = 'falco'
perform_shortread_qc = true
perform_longread_qc = true
@@ -29,7 +29,7 @@ params {
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
diff --git a/conf/test_fastp.config b/conf/test_fastp.config
index 3feeae7a..52767546 100644
--- a/conf/test_fastp.config
+++ b/conf/test_fastp.config
@@ -20,8 +20,8 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_tool = 'fastp'
@@ -30,7 +30,7 @@ params {
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config
index e93de158..f116af38 100644
--- a/conf/test_krakenuniq.config
+++ b/conf/test_krakenuniq.config
@@ -24,8 +24,8 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_krakenuniq.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_mergepairs = true
@@ -33,7 +33,7 @@ params {
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
@@ -47,7 +47,7 @@ params {
kmcp_mode = 0
run_ganon = false
run_krona = true
- krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
+ krona_taxonomy_directory = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
malt_save_reads = false
kraken2_save_reads = false
centrifuge_save_reads = false
diff --git a/conf/test_malt.config b/conf/test_malt.config
index 7e5f2df3..d4a91771 100644
--- a/conf/test_malt.config
+++ b/conf/test_malt.config
@@ -24,15 +24,15 @@ params {
max_time = '6.h'
// Input data
- input = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/samplesheet_malt.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet_malt.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
perform_shortread_qc = false
perform_longread_qc = false
perform_shortread_complexityfilter = false
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
diff --git a/conf/test_motus.config b/conf/test_motus.config
index ef1a2276..44cc36ff 100644
--- a/conf/test_motus.config
+++ b/conf/test_motus.config
@@ -24,7 +24,7 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
databases = 'database_motus.csv'
perform_shortread_qc = false
perform_longread_qc = false
@@ -32,7 +32,7 @@ params {
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config
index 004a49e8..98f34b07 100644
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@@ -20,15 +20,15 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
perform_shortread_qc = false
perform_longread_qc = false
perform_shortread_complexityfilter = false
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = true
diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config
index 7cf2317d..b1a52922 100644
--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@@ -20,8 +20,8 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_mergepairs = true
@@ -29,7 +29,7 @@ params {
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
index ed247ef4..504fe4ed 100644
--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@@ -20,15 +20,15 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
perform_shortread_qc = false
perform_longread_qc = false
perform_shortread_complexityfilter = false
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
diff --git a/conf/test_prinseqplusplus.config b/conf/test_prinseqplusplus.config
index acc23aa8..19fb2a60 100644
--- a/conf/test_prinseqplusplus.config
+++ b/conf/test_prinseqplusplus.config
@@ -20,8 +20,8 @@ params {
max_time = '6.h'
// Input data
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
- databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
perform_shortread_complexityfilter = true
@@ -29,7 +29,7 @@ params {
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
- hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+ hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
From 306d96a83e69e0aa04db2ec05becf879c72c5e32 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 08:47:19 +0200
Subject: [PATCH 10/35] Fix versions
---
assets/multiqc_config.yml | 8 ++++----
nextflow.config | 2 +-
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index af0223f3..0d892103 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,9 +1,9 @@
report_comment: >
-
- This report has been generated by the nf-core/taxprofiler
+
+ This report has been generated by the nf-core/taxprofiler
analysis pipeline. For information about how to interpret these results, please see the
- documentation.
-
+ documentation.
+
report_section_order:
"nf-core-taxprofiler-methods-description":
order: -1000
diff --git a/nextflow.config b/nextflow.config
index b442638c..b8a9ba8f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -385,7 +385,7 @@ manifest {
description = """Taxonomic classification and profiling of shotgun short- and long-read metagenomic data"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '1.1.8'
+ version = '1.1.8dev'
doi = '10.1101/2023.10.20.563221'
}
From bd2207f3619fa879900f128f892f1492b4a64510 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 08:55:24 +0200
Subject: [PATCH 11/35] Fix adapter removal adn test_full paths
---
conf/test_adapterremoval.config | 4 ++--
conf/test_full.config | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config
index 9f9d5b43..5fb6d362 100644
--- a/conf/test_adapterremoval.config
+++ b/conf/test_adapterremoval.config
@@ -20,8 +20,8 @@ params {
max_time = '6.h'
// Input data
- input = params.pipelines_testdata_base_path + 'samplesheet.csv'
- databases = params.pipelines_testdata_base_path + 'database_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_tool = 'adapterremoval'
diff --git a/conf/test_full.config b/conf/test_full.config
index 99af49e6..067940bb 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -13,8 +13,8 @@ params {
config_profile_description = 'Full test dataset to check pipeline function'
// Input data for full size test
- input = params.pipelines_testdata_base_path + 'test-datasets/taxprofiler/samplesheet_full.csv'
- databases = params.pipelines_testdata_base_path + 'test-datasets/taxprofiler/database_full_v1.1.csv'
+ input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet_full.csv'
+ databases = params.pipelines_testdata_base_path + 'taxprofiler/database_full_v1.1.csv'
// Genome references
hostremoval_reference = 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/819/615/GCA_000819615.1_ViralProj14015/GCA_000819615.1_ViralProj14015_genomic.fna.gz'
From ab74be6cedd93c19fafcc49a9e95d6c8f8f3c1f7 Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Date: Thu, 16 May 2024 09:31:37 +0200
Subject: [PATCH 12/35] Update docs/usage.md
Co-authored-by: James A. Fellows Yates
---
docs/usage.md | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/docs/usage.md b/docs/usage.md
index 00cdb564..37c24178 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -51,7 +51,13 @@ sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
2612,lane3,ILLUMINA,2612_lane3_R1.fq.gz,,
```
-Please note that the column name `run_accession` is following the ENA terms and is a single or paired-end set of demultiplexed FASTQs. Given that demultiplexing happens per lane, each sequencing pair from each lane is a 'run' and therefore you get multiple 'runs' per sample, which can span across both lanes and sequencing libraries.
+::: info
+Please note that the column name `run_accession` follows the definition of an ENA 'run'.
+A 'run' corresponds to a single or paired-end set of demultiplexed FASTQs.
+Given that demultiplexing of a given library happens per lane, each sequencing pair from each lane is a 'run'.
+Therefore, for each sample, you may get multiple 'runs' consisting of _both_ lanes (of the same library) _and_ sequencing libraries.
+Therefore ensure that each `run_accession` ID is unique, even if from the same sample!
+:::
:::warning
Runs of the same sample sequenced on Illumina platforms with a combination of single and paired-end data will **not** be run-wise concatenated, unless pair-merging is specified. In the example above, `run3` will be profiled independently of `run1` and `run2` if pairs are not merged.
From c3b624abb039bb5995a308f64f1b4d1342e90e95 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 12:56:22 +0200
Subject: [PATCH 13/35] Install new module and start extracting seq type
---
modules.json | 2 +-
.../krakenuniq/preloadedkrakenuniq/main.nf | 59 +--
.../krakenuniq/preloadedkrakenuniq/meta.yml | 30 +-
.../preloadedkrakenuniq/tests/main.nf.test | 211 +++++++++-
.../tests/main.nf.test.snap | 374 ++++++++++--------
subworkflows/local/profiling.nf | 8 +-
6 files changed, 464 insertions(+), 220 deletions(-)
diff --git a/modules.json b/modules.json
index f8c101e0..26dc2317 100644
--- a/modules.json
+++ b/modules.json
@@ -137,7 +137,7 @@
},
"krakenuniq/preloadedkrakenuniq": {
"branch": "master",
- "git_sha": "8bbaa881ab9e59f3e18680550d65d52339640630",
+ "git_sha": "9de9365c3ca6071ec01705919f6667c718ef47b4",
"installed_by": ["modules"]
},
"krona/ktimporttaxonomy": {
diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
index 59055bdb..78b2f3ab 100644
--- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
+++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
@@ -8,34 +8,37 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
'biocontainers/krakenuniq:1.0.4--pl5321h19e8d03_0' }"
input:
- tuple val(meta), path(fastqs)
- path db
+ tuple val(meta), path(sequences)
+ val sequence_type
+ path db
val ram_chunk_size
val save_output_reads
val report_file
val save_output
output:
- tuple val(meta), path('*.classified.fasta.gz') , optional:true, emit: classified_reads_fasta
- tuple val(meta), path('*.unclassified.fasta.gz') , optional:true, emit: unclassified_reads_fasta
- tuple val(meta), path('*.krakenuniq.classified.txt'), optional:true, emit: classified_assignment
- tuple val(meta), path('*.krakenuniq.report.txt') , emit: report
- path "versions.yml" , emit: versions
+ tuple val(meta), path("*.classified.${sequence_type}.gz") , optional:true, emit: classified_reads
+ tuple val(meta), path("*.unclassified.${sequence_type}.gz"), optional:true, emit: unclassified_reads
+ tuple val(meta), path('*.krakenuniq.classified.txt') , optional:true, emit: classified_assignment
+ tuple val(meta), path('*.krakenuniq.report.txt') , emit: report
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
+ assert sequence_type in ['fasta', 'fastq']
+
def args = task.ext.args ?: ''
def args2 = task.ext.args ?: ''
- def classified = meta.single_end ? '"\${PREFIX}.classified.fasta"' : '"\${PREFIX}.merged.classified.fasta"'
- def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fasta"' : '"\${PREFIX}.merged.unclassified.fasta"'
- def classified_option = save_output_reads ? "--classified-out ${classified}" : ''
- def unclassified_option = save_output_reads ? "--unclassified-out ${unclassified}" : ''
+ classified = meta.single_end ? "\${PREFIX}.classified.${sequence_type}" : "\${PREFIX}.merged.classified.${sequence_type}"
+ unclassified = meta.single_end ? "\${PREFIX}.unclassified.${sequence_type}" : "\${PREFIX}.merged.unclassified.${sequence_type}"
+ classified_option = save_output_reads ? "--classified-out \"${classified}\"" : ''
+ unclassified_option = save_output_reads ? "--unclassified-out \"${unclassified}\"" : ''
def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
- def compress_reads_command = save_output_reads ? 'gzip --no-name *.fasta' : ''
+ compress_reads_command = save_output_reads ? "find . -name '*.${sequence_type}' -print0 | xargs -0 -t -P ${task.cpus} -I % gzip --no-name %" : ''
if (meta.single_end) {
"""
krakenuniq \\
@@ -51,7 +54,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
echo "\${result%%.*}"
}
- printf "%s\\n" ${fastqs} | while read FASTQ; do \\
+ printf "%s\\n" ${sequences} | while read FASTQ; do \\
PREFIX="\$(strip_suffix "\${FASTQ}")"
krakenuniq \\
@@ -89,7 +92,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
echo "\${result%.}"
}
- printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
+ printf "%s %s\\n" ${sequences} | while read FASTQ; do \\
read -r -a FASTQ <<< "\${FASTQ}"
PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
@@ -115,16 +118,18 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
}
stub:
+ assert sequence_type in ['fasta', 'fastq']
+
def args = task.ext.args ?: ''
def args2 = task.ext.args ?: ''
- def classified = meta.single_end ? '"\${PREFIX}.classified.fasta"' : '"\${PREFIX}.merged.classified.fasta"'
- def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fasta"' : '"\${PREFIX}.merged.unclassified.fasta"'
- def classified_option = save_output_reads ? "--classified-out ${classified}" : ''
- def unclassified_option = save_output_reads ? "--unclassified-out ${unclassified}" : ''
+ classified = meta.single_end ? "\${PREFIX}.classified.${sequence_type}" : "\${PREFIX}.merged.classified.${sequence_type}"
+ unclassified = meta.single_end ? "\${PREFIX}.unclassified.${sequence_type}" : "\${PREFIX}.merged.unclassified.${sequence_type}"
+ classified_option = save_output_reads ? "--classified-out \"${classified}\"" : ''
+ unclassified_option = save_output_reads ? "--unclassified-out \"${unclassified}\"" : ''
def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
- def compress_reads_command = save_output_reads ? 'gzip --no-name *.fasta' : ''
+ compress_reads_command = save_output_reads ? "find . -name '*.${sequence_type}' -print0 | xargs -0 -t -P ${task.cpus} -I % gzip --no-name %" : ''
if (meta.single_end) {
"""
echo krakenuniq \\
@@ -148,7 +153,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
echo '<3 nf-core' | gzip -n > "\$1"
}
- printf "%s\\n" ${fastqs} | while read FASTQ; do \\
+ printf "%s\\n" ${sequences} | while read FASTQ; do \\
echo "\${FASTQ}"
PREFIX="\$(strip_suffix "\${FASTQ}")"
echo "\${PREFIX}"
@@ -165,11 +170,11 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
create_file "\${PREFIX}.krakenuniq.classified.txt"
create_file "\${PREFIX}.krakenuniq.report.txt"
- create_gzip_file "\${PREFIX}.classified.fasta.gz"
- create_gzip_file "\${PREFIX}.unclassified.fasta.gz"
+ create_gzip_file "\${PREFIX}.classified.${sequence_type}.gz"
+ create_gzip_file "\${PREFIX}.unclassified.${sequence_type}.gz"
done
- echo $compress_reads_command
+ echo "$compress_reads_command"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -201,7 +206,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
echo '<3 nf-core' | gzip -n > "\$1"
}
- printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
+ printf "%s %s\\n" ${sequences} | while read FASTQ; do \\
read -r -a FASTQ <<< "\${FASTQ}"
echo "\${FASTQ[@]}"
PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
@@ -220,11 +225,11 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
create_file "\${PREFIX}.krakenuniq.classified.txt"
create_file "\${PREFIX}.krakenuniq.report.txt"
- create_gzip_file "\${PREFIX}.merged.classified.fasta.gz"
- create_gzip_file "\${PREFIX}.merged.unclassified.fasta.gz"
+ create_gzip_file "\${PREFIX}.merged.classified.${sequence_type}.gz"
+ create_gzip_file "\${PREFIX}.merged.unclassified.${sequence_type}.gz"
done
- echo $compress_reads_command
+ echo "$compress_reads_command"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml
index 4a6dffee..bb6409a6 100644
--- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml
+++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml
@@ -19,9 +19,13 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- - fastqs:
+ - sequences:
type: file
- description: List of input FastQ files
+ description: List of input files containing sequences. All of them must be either in FASTA or FASTQ format.
+ - sequence_type:
+ type: string
+ description: Format of all given sequencing files as literal string, either 'fasta' or 'fastq'.
+ pattern: "{fasta,fastq}"
- db:
type: directory
description: KrakenUniq database
@@ -32,31 +36,33 @@ input:
- save_output_reads:
type: boolean
description: |
- Optionally commands are added to save classified and unclassified reads as FASTA files.
- When the input is paired-end, the single output FASTA contains merged reads.
- - save_reads_assignment:
+ Optionally, commands are added to save classified and unclassified reads
+ as FASTQ or FASTA files depending on the input format. When the input
+ is paired-end, the single output FASTQ contains merged reads.
+ - report_file:
type: boolean
- description: |
- If true, an optional command is added to save a file reporting the taxonomic
- classification of each input read
+ description: Whether to generate a report of relative abundances.
+ - save_output:
+ type: boolean
+ description: Whether to save a file reporting the taxonomic classification of each input read.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- - classified_reads_fasta:
+ - classified_reads:
type: file
description: |
Reads classified as belonging to any of the taxa
in the KrakenUniq reference database.
- pattern: "*.classified.fasta.gz"
- - unclassified_reads_fasta:
+ pattern: "*.classified.{fastq,fasta}.gz"
+ - unclassified_reads:
type: file
description: |
Reads not classified to any of the taxa
in the KrakenUniq reference database.
- pattern: "*.unclassified.fasta.gz"
+ pattern: "*.unclassified.{fastq,fasta}.gz"
- classified_assignment:
type: file
description: |
diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test
index a7c44707..9e1d6700 100644
--- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test
+++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test
@@ -7,10 +7,23 @@ nextflow_process {
tag "modules_nfcore"
tag "krakenuniq"
tag "krakenuniq/preloadedkrakenuniq"
+ tag "untar"
- test("sarscov2 - Illumina FASTQ single - stub-run") {
- options "-stub-run"
+ setup {
+ run("UNTAR") {
+ script "modules/nf-core/untar/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'krakenuniq'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/krakenuniq.tar.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+ test("sarscov2 - FASTA") {
when {
params {
outdir = "$outputDir"
@@ -19,15 +32,170 @@ nextflow_process {
"""
input[0] = [
[id:'test', single_end:true],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ input[1] = 'fasta'
+ input[2] = UNTAR.out.untar.map { it[1] }
+ input[3] = '1GB'
+ input[4] = true
+ input[5] = true
+ input[6] = true
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ // Report contains a timestamp.
+ { assert file(process.out.report.get(0).get(1)).name == 'genome.krakenuniq.report.txt' },
+ { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'genome.unclassified.fasta.gz' },
+ { assert snapshot(
+ process.out.classified_reads,
+ process.out.classified_assignment,
+ process.out.versions
+ ).match('fasta') },
+ )
+ }
+
+ }
+
+ test("sarscov2 - Illumina FASTQ single") {
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id:'test', single_end:true],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = 'fastq'
+ input[2] = UNTAR.out.untar.map { it[1] }
+ input[3] = '1GB'
+ input[4] = true
+ input[5] = true
+ input[6] = true
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ // Report contains a timestamp.
+ { assert file(process.out.report.get(0).get(1)).name == 'test_interleaved.krakenuniq.report.txt' },
+ { assert snapshot(
+ process.out.classified_reads,
+ process.out.unclassified_reads,
+ process.out.classified_assignment,
+ process.out.versions
+ ).match('fastq-single') },
+ )
+ }
+
+ }
+
+ test("sarscov2 - Illumina FASTQ paired-end") {
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id:'test', single_end:false],
[
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
]
]
- input[1] = []
- input[2] = '8GB'
- input[3] = true
+ input[1] = 'fastq'
+ input[2] = UNTAR.out.untar.map { it[1] }
+ input[3] = '1GB'
+ input[4] = true
+ input[5] = true
+ input[6] = true
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ // Report contains a timestamp.
+ { assert file(process.out.report.get(0).get(1)).name == 'test.krakenuniq.report.txt' },
+ { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'test.merged.unclassified.fastq.gz' },
+ { assert snapshot(
+ process.out.classified_reads,
+ process.out.classified_assignment,
+ process.out.versions
+ ).match('fastq-paired') },
+ )
+ }
+
+ }
+
+ test("sarscov2 - FASTA - stub") {
+ options "-stub-run"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id:'test', single_end:true],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ input[1] = 'fasta'
+ input[2] = UNTAR.out.untar.map { it[1] }
+ input[3] = '1GB'
+ input[4] = true
+ input[5] = true
+ input[6] = true
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ // Report contains a timestamp.
+ { assert file(process.out.report.get(0).get(1)).name == 'genome.krakenuniq.report.txt' },
+ { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'genome.unclassified.fasta.gz' },
+ { assert snapshot(
+ process.out.classified_reads,
+ process.out.unclassified_reads,
+ process.out.classified_assignment,
+ process.out.versions
+ ).match('fasta-stub') },
+ )
+ }
+
+ }
+
+ test("sarscov2 - Illumina FASTQ single - stub") {
+ options "-stub-run"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id:'test', single_end:true],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = 'fastq'
+ input[2] = UNTAR.out.untar.map { it[1] }
+ input[3] = '1GB'
input[4] = true
input[5] = true
+ input[6] = true
"""
}
}
@@ -35,13 +203,20 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out).match() },
+ // Report contains a timestamp.
+ { assert file(process.out.report.get(0).get(1)).name == 'test_interleaved.krakenuniq.report.txt' },
+ { assert snapshot(
+ process.out.classified_reads,
+ process.out.unclassified_reads,
+ process.out.classified_assignment,
+ process.out.versions
+ ).match('fastq-single-stub') },
)
}
}
- test("sarscov2 - Illumina FASTQ paired-end - stub-run") {
+ test("sarscov2 - Illumina FASTQ paired-end - stub") {
options "-stub-run"
when {
@@ -53,15 +228,16 @@ nextflow_process {
input[0] = [
[id:'test', single_end:false],
[
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
]
]
- input[1] = []
- input[2] = '8GB'
- input[3] = true
+ input[1] = 'fastq'
+ input[2] = UNTAR.out.untar.map { it[1] }
+ input[3] = '1GB'
input[4] = true
input[5] = true
+ input[6] = true
"""
}
}
@@ -69,7 +245,14 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out).match() },
+ // Report contains a timestamp.
+ { assert file(process.out.report.get(0).get(1)).name == 'test.krakenuniq.report.txt' },
+ { assert snapshot(
+ process.out.classified_reads,
+ process.out.unclassified_reads,
+ process.out.classified_assignment,
+ process.out.versions
+ ).match('fastq-paired-stub') },
)
}
diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap
index 970865bd..2a431be8 100644
--- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap
+++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap
@@ -1,172 +1,218 @@
{
- "sarscov2 - Illumina FASTQ paired-end - stub-run": {
+ "fastq-single-stub": {
"content": [
- {
- "0": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.merged.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20"
- ]
- ],
- "1": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.merged.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20"
- ]
- ],
- "2": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975"
- ]
- ],
- "3": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975"
- ]
- ],
- "4": [
- "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
- ],
- "classified_assignment": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975"
- ]
- ],
- "classified_reads_fasta": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.merged.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20"
- ]
- ],
- "report": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975"
- ]
- ],
- "unclassified_reads_fasta": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.merged.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20"
- ]
- ],
- "versions": [
- "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
- ]
- }
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_interleaved.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_interleaved.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_interleaved.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975"
+ ]
+ ],
+ [
+ "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-06T11:21:36.338887437"
+ },
+ "fastq-single": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_interleaved.classified.fastq.gz:md5,3bd95021a8fbced1be8039b990b28176"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_interleaved.unclassified.fastq.gz:md5,143c7eb70ca93cc2d5ea98767c370424"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_interleaved.krakenuniq.classified.txt:md5,88a734a9a9216cb0770a77f36c9f4e78"
+ ]
+ ],
+ [
+ "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-06T11:17:43.586414914"
+ },
+ "fastq-paired": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.classified.fastq.gz:md5,dd7651837cce63e6108e28f4f019aedb"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.krakenuniq.classified.txt:md5,ed5e19c7a88312cc04e483ac5f2579cd"
+ ]
+ ],
+ [
+ "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
+ ]
],
- "timestamp": "2023-11-21T15:38:47.810576872"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-06T11:37:46.718293365"
},
- "sarscov2 - Illumina FASTQ single - stub-run": {
+ "fasta-stub": {
"content": [
- {
- "0": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test_1.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20"
- ]
- ],
- "1": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test_1.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20"
- ]
- ],
- "2": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975"
- ]
- ],
- "3": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test_1.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975"
- ]
- ],
- "4": [
- "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
- ],
- "classified_assignment": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975"
- ]
- ],
- "classified_reads_fasta": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test_1.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20"
- ]
- ],
- "report": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test_1.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975"
- ]
- ],
- "unclassified_reads_fasta": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test_1.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20"
- ]
- ],
- "versions": [
- "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
- ]
- }
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "genome.classified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "genome.unclassified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "genome.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975"
+ ]
+ ],
+ [
+ "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-06T11:28:27.729550991"
+ },
+ "fastq-paired-stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975"
+ ]
+ ],
+ [
+ "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-05T20:06:20.262529457"
+ },
+ "fasta": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "genome.classified.fasta.gz:md5,e73599798195a519ba2565c3f0275b93"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "genome.krakenuniq.classified.txt:md5,8aafacd89a6aac98aaf512df0a7493d1"
+ ]
+ ],
+ [
+ "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c"
+ ]
],
- "timestamp": "2023-11-21T15:38:42.894597091"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-06T11:36:00.24752418"
}
}
\ No newline at end of file
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index e306f1de..233d867f 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -362,20 +362,24 @@ workflow PROFILING {
ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq
.map {
meta, reads, db_meta, db ->
- [[id: db_meta.db_name, single_end: meta.single_end], reads, db_meta, db]
+ def seqtype = reads[0].matches('*a.gz|*a') ? 'fasta' : 'fastq'
+ [[id: db_meta.db_name, single_end: meta.single_end, seqtype: seqtype], reads, db_meta, db]
}
+ .dump(tag: 'ch_input_for_krakenuniq_pregrouptuple')
.groupTuple(by: [0,2,3])
.flatMap { single_meta, reads, db_meta, db ->
def batches = reads.collate(params.krakenuniq_batch_size)
return batches.collect { batch -> [ single_meta + db_meta, batch.flatten(), db ]}
}
+ .dump(tag: 'ch_input_for_krakenuniq_premultimap')
.multiMap {
meta, reads, db ->
reads: [ meta, reads ]
db: db
+ seqtype: meta.seqtype
}
// Hardcode to _always_ produce the report file (which is our basic output, and goes into)
- KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications )
+ KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.seqtype, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment )
From 035ac0ca06a5dba7f8b5cc7560729293c73f5de8 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 13:05:55 +0200
Subject: [PATCH 14/35] Missing changelog, put logo back in MQC report
---
CHANGELOG.md | 2 ++
workflows/taxprofiler.nf | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b1435a66..ae312e0f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`
+- [#487](https://github.com/nf-core/taxprofiler/pull/487) Updated to nf-core pipeline template v2.14.1 (added by jfy133)
+
### `Fixed`
### `Dependencies`
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 6c6e8f5c..fdb4266c 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -333,7 +333,7 @@ workflow TAXPROFILER {
Channel.empty()
ch_multiqc_logo = params.multiqc_logo ?
Channel.fromPath(params.multiqc_logo, checkIfExists: true) :
- Channel.empty()
+ Channel.fromPath("${workflow.projectDir}/docs/images/nf-core-taxprofiler_logo_custom_light.png", checkIfExists: true)
summary_params = paramsSummaryMap(
workflow, parameters_schema: "nextflow_schema.json")
From 558fcbb6844e98e1b54f8698601fb4c70899cfdf Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 11:21:29 +0000
Subject: [PATCH 15/35] Add updated docs
---
docs/usage.md | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/docs/usage.md b/docs/usage.md
index 37c24178..6534b820 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -53,9 +53,9 @@ sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
::: info
Please note that the column name `run_accession` follows the definition of an ENA 'run'.
-A 'run' corresponds to a single or paired-end set of demultiplexed FASTQs.
+A 'run' corresponds to a single or paired-end set of demultiplexed FASTQs.
Given that demultiplexing of a given library happens per lane, each sequencing pair from each lane is a 'run'.
-Therefore, for each sample, you may get multiple 'runs' consisting of _both_ lanes (of the same library) _and_ sequencing libraries.
+Therefore, for each sample, you may get multiple 'runs' consisting of _both_ lanes (of the same library) _and_ sequencing libraries.
Therefore ensure that each `run_accession` ID is unique, even if from the same sample!
:::
@@ -460,6 +460,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
- A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/)
- `apptainer`
- A generic configuration profile to be used with [Apptainer](https://apptainer.org/)
+- `wave`
+ - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later).
- `conda`
- A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer.
From faa002dd5c5c3383cb2f9497426e0a9828720286 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 14:09:59 +0200
Subject: [PATCH 16/35] Fix metadata
---
subworkflows/local/longread_preprocessing.nf | 4 ++--
subworkflows/local/profiling.nf | 8 ++++----
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf
index 30963ec6..72261013 100644
--- a/subworkflows/local/longread_preprocessing.nf
+++ b/subworkflows/local/longread_preprocessing.nf
@@ -20,7 +20,7 @@ workflow LONGREAD_PREPROCESSING {
PORECHOP_PORECHOP ( reads )
ch_processed_reads = PORECHOP_PORECHOP.out.reads
- .map { meta, reads -> [ meta + [single_end: 1], reads ] }
+ .map { meta, reads -> [ meta + [single_end: true], reads ] }
ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log )
@@ -34,7 +34,7 @@ workflow LONGREAD_PREPROCESSING {
} else {
PORECHOP_PORECHOP ( reads )
ch_clipped_reads = PORECHOP_PORECHOP.out.reads
- .map { meta, reads -> [ meta + [single_end: 1], reads ] }
+ .map { meta, reads -> [ meta + [single_end: true], reads ] }
ch_processed_reads = FILTLONG ( ch_clipped_reads.map { meta, reads -> [ meta, [], reads ] } ).reads
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 233d867f..e64c3b61 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -60,7 +60,7 @@ workflow PROFILING {
COMBINE READS WITH POSSIBLE DATABASES
*/
- // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90]
+ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':true], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90]
ch_input_for_profiling = reads
.map {
meta, reads ->
@@ -362,7 +362,7 @@ workflow PROFILING {
ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq
.map {
meta, reads, db_meta, db ->
- def seqtype = reads[0].matches('*a.gz|*a') ? 'fasta' : 'fastq'
+ def seqtype = reads[0].name.matches(".*a.gz\$|.*a\$") ? 'fasta' : 'fastq'
[[id: db_meta.db_name, single_end: meta.single_end, seqtype: seqtype], reads, db_meta, db]
}
.dump(tag: 'ch_input_for_krakenuniq_pregrouptuple')
@@ -382,8 +382,8 @@ workflow PROFILING {
KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.seqtype, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() )
- ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment )
- ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
+ ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]}.dump(tag: 'post-ku-classifications') )
+ ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]}.dump(tag: 'post-ku-reports') )
}
From ad008c8a1242a91028a3b29dd90210888cb9adda Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 14:15:24 +0200
Subject: [PATCH 17/35] Ensure to published FASTQ files too
---
conf/modules.config | 2 +-
docs/output.md | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/conf/modules.config b/conf/modules.config
index 1956605e..dce43d1c 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -517,7 +517,7 @@ process {
publishDir = [
path: { "${params.outdir}/krakenuniq/${meta.db_name}/" },
mode: params.publish_dir_mode,
- pattern: '*.{txt,fasta.gz}'
+ pattern: '*.{txt,fastq.gz,fasta.gz}'
]
}
diff --git a/docs/output.md b/docs/output.md
index 2cebd463..8fbb22f9 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -398,8 +398,8 @@ You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply
- `krakenuniq/`
- `/`
- - `_[.merged].classified.fasta.gz`: Optional FASTA file containing all reads that had a hit against a reference in the database for a given sample. Paired-end input reads are merged in this output.
- - `_[.merged].unclassified.fasta.gz`: Optional FASTA file containing all reads that did not have a hit in the database for a given sample. Paired-end input reads are merged in this output.
+ - `_[.merged].classified.fast{a,q}.gz`: Optional FASTA file containing all reads that had a hit against a reference in the database for a given sample. Paired-end input reads are merged in this output.
+ - `_[.merged].unclassified.fast{a,q}.gz`: Optional FASTA file containing all reads that did not have a hit in the database for a given sample. Paired-end input reads are merged in this output.
- `_.krakenuniq.report.txt`: A Kraken2-style report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits, with an additional column for k-mer coverage, that allows for more accurate distinguishing between false-positive/true-postitive hits.
- `_.krakenuniq.classified.txt`: An optional list of read IDs and the hits each read had against each database for a given sample.
From d5d82b13e210bbc8ea8bae476c9a2de80d50ac0a Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 16 May 2024 16:06:24 +0200
Subject: [PATCH 18/35] Apply suggestions from code review
Co-authored-by: Moritz E. Beber
---
subworkflows/local/profiling.nf | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index e64c3b61..2bd04564 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -362,16 +362,14 @@ workflow PROFILING {
ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq
.map {
meta, reads, db_meta, db ->
- def seqtype = reads[0].name.matches(".*a.gz\$|.*a\$") ? 'fasta' : 'fastq'
+ def seqtype = (reads[0].name ==~ /.+?\.f\w{0,3}a(\.gz)?$/) ? 'fasta' : 'fastq'
[[id: db_meta.db_name, single_end: meta.single_end, seqtype: seqtype], reads, db_meta, db]
}
- .dump(tag: 'ch_input_for_krakenuniq_pregrouptuple')
.groupTuple(by: [0,2,3])
.flatMap { single_meta, reads, db_meta, db ->
def batches = reads.collate(params.krakenuniq_batch_size)
return batches.collect { batch -> [ single_meta + db_meta, batch.flatten(), db ]}
}
- .dump(tag: 'ch_input_for_krakenuniq_premultimap')
.multiMap {
meta, reads, db ->
reads: [ meta, reads ]
@@ -382,8 +380,8 @@ workflow PROFILING {
KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.seqtype, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() )
- ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]}.dump(tag: 'post-ku-classifications') )
- ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]}.dump(tag: 'post-ku-reports') )
+ ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]} )
+ ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]} )
}
From 888886632f9db02d51d217f1bfb895ed6c9b504b Mon Sep 17 00:00:00 2001
From: Sofia Stamouli
Date: Thu, 23 May 2024 16:06:17 +0200
Subject: [PATCH 19/35] Add flag to publish intermediate bracken files
---
conf/modules.config | 3 +-
docs/output.md | 2 ++
nextflow.config | 3 +-
nextflow_schema.json | 76 ++++++++++++++++++++++++++++++++++++++------
4 files changed, 72 insertions(+), 12 deletions(-)
diff --git a/conf/modules.config b/conf/modules.config
index dce43d1c..693f1e2e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -467,7 +467,8 @@ process {
publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}/" },
mode: params.publish_dir_mode,
- pattern: '*.{txt,fastq.gz}'
+ pattern: '*.{txt,fastq.gz}',
+ saveAs: { params.bracken_save_intermediatekraken2 == false && meta.tool == bracken ? false : true }
]
}
diff --git a/docs/output.md b/docs/output.md
index 8fbb22f9..dc5f99d4 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -389,6 +389,8 @@ The main taxonomic classification file from Kraken2 is the `_combined_reports.tx
You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline.
+When running Kraken2 together with bracken, you can use the `--bracken_save_intermediatekraken2` to save the intermediate/upstream files generated during the bracken run.
+
### KrakenUniq
[KrakenUniq](https://github.com/fbreitwieser/krakenuniq) (formerly KrakenHLL) is an extension to the fast k-mer-based classification performed by [Kraken](https://github.com/DerrickWood/kraken) with an efficient algorithm for additionally assessing the coverage of unique k-mers found in each species in a dataset.
diff --git a/nextflow.config b/nextflow.config
index b8a9ba8f..1b105692 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -135,7 +135,8 @@ params {
krakenuniq_batch_size = 20
// Bracken
- run_bracken = false
+ run_bracken = false
+ bracken_save_intermediatekraken2 = false
// centrifuge
run_centrifuge = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 3f7d9eec..5d004189 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,7 +10,11 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
- "required": ["input", "databases", "outdir"],
+ "required": [
+ "input",
+ "databases",
+ "outdir"
+ ],
"properties": {
"input": {
"type": "string",
@@ -75,7 +79,10 @@
"preprocessing_qc_tool": {
"type": "string",
"default": "fastqc",
- "enum": ["fastqc", "falco"],
+ "enum": [
+ "fastqc",
+ "falco"
+ ],
"help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.",
"description": "Specify the tool used for quality control of raw sequencing reads",
"fa_icon": "fas fa-tools"
@@ -110,7 +117,10 @@
"shortread_qc_tool": {
"type": "string",
"default": "fastp",
- "enum": ["fastp", "adapterremoval"],
+ "enum": [
+ "fastp",
+ "adapterremoval"
+ ],
"fa_icon": "fas fa-tools",
"description": "Specify which tool to use for short-read QC"
},
@@ -172,7 +182,11 @@
"shortread_complexityfilter_tool": {
"type": "string",
"default": "bbduk",
- "enum": ["bbduk", "prinseqplusplus", "fastp"],
+ "enum": [
+ "bbduk",
+ "prinseqplusplus",
+ "fastp"
+ ],
"fa_icon": "fas fa-hammer",
"description": "Specify which tool to use for complexity filtering"
},
@@ -206,7 +220,10 @@
"shortread_complexityfilter_prinseqplusplus_mode": {
"type": "string",
"default": "entropy",
- "enum": ["entropy", "dust"],
+ "enum": [
+ "entropy",
+ "dust"
+ ],
"fa_icon": "fas fa-check-square",
"description": "Specify the complexity filter mode for PRINSEQ++"
},
@@ -377,7 +394,15 @@
"diamond_output_format": {
"type": "string",
"default": "tsv",
- "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"],
+ "enum": [
+ "blast",
+ "xml",
+ "txt",
+ "daa",
+ "sam",
+ "tsv",
+ "paf"
+ ],
"fa_icon": "fas fa-file",
"description": "Specify output format from DIAMOND profiling.",
"help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`"
@@ -402,7 +427,14 @@
"kaiju_taxon_rank": {
"type": "string",
"default": "species",
- "enum": ["phylum", "class", "order", "family", "genus", "species"],
+ "enum": [
+ "phylum",
+ "class",
+ "order",
+ "family",
+ "genus",
+ "species"
+ ],
"fa_icon": "fas fa-tag",
"description": "Specify taxonomic rank to be displayed in Kaiju taxon table",
"help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be only be a single level (e.g. `species`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`"
@@ -466,6 +498,11 @@
"description": "Turn on Bracken (and the required Kraken2 prerequisite step).",
"fa_icon": "fas fa-toggle-on"
},
+ "bracken_save_intermediatekraken2": {
+ "type": "boolean",
+ "fa_icon": "fas fa-save",
+ "description": "Turn on saving the upstream Bracken files"
+ },
"run_malt": {
"type": "boolean",
"fa_icon": "fas fa-toggle-on",
@@ -552,7 +589,13 @@
"default": "reads",
"description": "Specify the type of ganon report to save.",
"help_text": "Specify the type of taxonomic report to produce from ganon report. This mainly refers to which form of 'value' to print: raw read counts, abundance estimates, genome-size normalised etc. \n\nSee the [ganon documentation](https://pirovc.github.io/ganon/outputfiles/#ganon-report) for more information of each option.\n\n> Modifies tool parameter(s):\n- ganon report: `--report-type`\n",
- "enum": ["abundance", "reads", "matches", "dist", "corr"],
+ "enum": [
+ "abundance",
+ "reads",
+ "matches",
+ "dist",
+ "corr"
+ ],
"fa_icon": "fas fa-file"
},
"ganon_report_rank": {
@@ -620,7 +663,13 @@
"default": "tsv",
"fa_icon": "fas fa-pastafarianism",
"description": "The desired output format.",
- "enum": ["tsv", "csv", "arrow", "parquet", "biom"]
+ "enum": [
+ "tsv",
+ "csv",
+ "arrow",
+ "parquet",
+ "biom"
+ ]
},
"taxpasta_taxonomy_dir": {
"type": "string",
@@ -775,7 +824,14 @@
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
- "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+ "enum": [
+ "symlink",
+ "rellink",
+ "link",
+ "copy",
+ "copyNoFollow",
+ "move"
+ ],
"hidden": true
},
"email_on_fail": {
From 11ee2747678ae7ed18bdf4867a419549a90f4544 Mon Sep 17 00:00:00 2001
From: Sofia Stamouli
Date: Thu, 23 May 2024 16:40:00 +0200
Subject: [PATCH 20/35] prettier
---
nextflow_schema.json | 71 +++++++-------------------------------------
1 file changed, 10 insertions(+), 61 deletions(-)
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 5d004189..999f25d8 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,11 +10,7 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
- "required": [
- "input",
- "databases",
- "outdir"
- ],
+ "required": ["input", "databases", "outdir"],
"properties": {
"input": {
"type": "string",
@@ -79,10 +75,7 @@
"preprocessing_qc_tool": {
"type": "string",
"default": "fastqc",
- "enum": [
- "fastqc",
- "falco"
- ],
+ "enum": ["fastqc", "falco"],
"help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.",
"description": "Specify the tool used for quality control of raw sequencing reads",
"fa_icon": "fas fa-tools"
@@ -117,10 +110,7 @@
"shortread_qc_tool": {
"type": "string",
"default": "fastp",
- "enum": [
- "fastp",
- "adapterremoval"
- ],
+ "enum": ["fastp", "adapterremoval"],
"fa_icon": "fas fa-tools",
"description": "Specify which tool to use for short-read QC"
},
@@ -182,11 +172,7 @@
"shortread_complexityfilter_tool": {
"type": "string",
"default": "bbduk",
- "enum": [
- "bbduk",
- "prinseqplusplus",
- "fastp"
- ],
+ "enum": ["bbduk", "prinseqplusplus", "fastp"],
"fa_icon": "fas fa-hammer",
"description": "Specify which tool to use for complexity filtering"
},
@@ -220,10 +206,7 @@
"shortread_complexityfilter_prinseqplusplus_mode": {
"type": "string",
"default": "entropy",
- "enum": [
- "entropy",
- "dust"
- ],
+ "enum": ["entropy", "dust"],
"fa_icon": "fas fa-check-square",
"description": "Specify the complexity filter mode for PRINSEQ++"
},
@@ -394,15 +377,7 @@
"diamond_output_format": {
"type": "string",
"default": "tsv",
- "enum": [
- "blast",
- "xml",
- "txt",
- "daa",
- "sam",
- "tsv",
- "paf"
- ],
+ "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"],
"fa_icon": "fas fa-file",
"description": "Specify output format from DIAMOND profiling.",
"help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`"
@@ -427,14 +402,7 @@
"kaiju_taxon_rank": {
"type": "string",
"default": "species",
- "enum": [
- "phylum",
- "class",
- "order",
- "family",
- "genus",
- "species"
- ],
+ "enum": ["phylum", "class", "order", "family", "genus", "species"],
"fa_icon": "fas fa-tag",
"description": "Specify taxonomic rank to be displayed in Kaiju taxon table",
"help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be only be a single level (e.g. `species`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`"
@@ -589,13 +557,7 @@
"default": "reads",
"description": "Specify the type of ganon report to save.",
"help_text": "Specify the type of taxonomic report to produce from ganon report. This mainly refers to which form of 'value' to print: raw read counts, abundance estimates, genome-size normalised etc. \n\nSee the [ganon documentation](https://pirovc.github.io/ganon/outputfiles/#ganon-report) for more information of each option.\n\n> Modifies tool parameter(s):\n- ganon report: `--report-type`\n",
- "enum": [
- "abundance",
- "reads",
- "matches",
- "dist",
- "corr"
- ],
+ "enum": ["abundance", "reads", "matches", "dist", "corr"],
"fa_icon": "fas fa-file"
},
"ganon_report_rank": {
@@ -663,13 +625,7 @@
"default": "tsv",
"fa_icon": "fas fa-pastafarianism",
"description": "The desired output format.",
- "enum": [
- "tsv",
- "csv",
- "arrow",
- "parquet",
- "biom"
- ]
+ "enum": ["tsv", "csv", "arrow", "parquet", "biom"]
},
"taxpasta_taxonomy_dir": {
"type": "string",
@@ -824,14 +780,7 @@
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
- "enum": [
- "symlink",
- "rellink",
- "link",
- "copy",
- "copyNoFollow",
- "move"
- ],
+ "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"hidden": true
},
"email_on_fail": {
From 0064d0d02092bd4308aaea671f87bba3293e38ad Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Fri, 24 May 2024 05:33:43 +0200
Subject: [PATCH 21/35] Update conf/modules.config
---
conf/modules.config | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/conf/modules.config b/conf/modules.config
index 693f1e2e..163f0b00 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -468,7 +468,7 @@ process {
path: { "${params.outdir}/kraken2/${meta.db_name}/" },
mode: params.publish_dir_mode,
pattern: '*.{txt,fastq.gz}',
- saveAs: { params.bracken_save_intermediatekraken2 == false && meta.tool == bracken ? false : true }
+ saveAs: { params.bracken_save_intermediatekraken2 == false && meta.tool == bracken ? null : it }
]
}
From 3c433a6452318e1a6deb6b34080259faecbadf2e Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Date: Thu, 30 May 2024 10:35:19 +0200
Subject: [PATCH 22/35] Correct condition
---
conf/modules.config | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/conf/modules.config b/conf/modules.config
index 163f0b00..2148b69f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -468,7 +468,7 @@ process {
path: { "${params.outdir}/kraken2/${meta.db_name}/" },
mode: params.publish_dir_mode,
pattern: '*.{txt,fastq.gz}',
- saveAs: { params.bracken_save_intermediatekraken2 == false && meta.tool == bracken ? null : it }
+ saveAs: { !params.bracken_save_intermediatekraken2 && meta.tool == "bracken" ? null : it }
]
}
From e14894832d86acb0cb4bb51b010b012fd1b1991f Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Date: Thu, 30 May 2024 10:45:29 +0200
Subject: [PATCH 23/35] Update CHANGELOG.md
---
CHANGELOG.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 043a62c7..5134ae3f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Fixed`
- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133)
+- [491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133)
### `Dependencies`
From 892782f851343b6e3df996bbef08c4c84ee171dc Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Date: Thu, 30 May 2024 10:47:39 +0200
Subject: [PATCH 24/35] Update nextflow_schema.json
Co-authored-by: James A. Fellows Yates
---
nextflow_schema.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 999f25d8..0e4185b5 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -469,7 +469,7 @@
"bracken_save_intermediatekraken2": {
"type": "boolean",
"fa_icon": "fas fa-save",
- "description": "Turn on saving the upstream Bracken files"
+ "description": "Turn on the saving of the intermediate Kraken2 files used as input to Bracken itself into Kraken2 results folder"
},
"run_malt": {
"type": "boolean",
From fffe8e180093c9fe13baf22551ce514626ffa157 Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Date: Thu, 30 May 2024 10:47:45 +0200
Subject: [PATCH 25/35] Update docs/output.md
Co-authored-by: James A. Fellows Yates
---
docs/output.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/output.md b/docs/output.md
index dc5f99d4..9528514b 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -389,7 +389,7 @@ The main taxonomic classification file from Kraken2 is the `_combined_reports.tx
You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline.
-When running Kraken2 together with bracken, you can use the `--bracken_save_intermediatekraken2` to save the intermediate/upstream files generated during the bracken run.
+When running Bracken, you will only get the 'intermediate' Kraken2 report files in this directory if you supply `--bracken_save_intermediatekraken2` to the run.
### KrakenUniq
From ca29906d6ed2ec43980810d81f1d8161429b8a1f Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Date: Thu, 30 May 2024 10:52:40 +0200
Subject: [PATCH 26/35] Update output.md
---
docs/output.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/output.md b/docs/output.md
index 9528514b..732d5964 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -376,11 +376,11 @@ The main taxonomic profiling file from Bracken is the `*.tsv` file. This provide
- `kraken2/`
- `_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `krakentools`)
- - If you have also run Bracken, the original Kraken report (i.e., _before_ read re-assignment) will also be included in this directory with `-bracken` suffixed to your Bracken database name. For example: `kraken2--bracken.tsv`. However in most cases you want to use the actual Bracken file (i.e., `bracken_.tsv`).
+ - If you have also run Bracken, the original Kraken report (i.e., _before_ read re-assignment) will also be included in this directory with `-bracken` suffixed to your Bracken database name if you supply `--bracken_save_intermediatekraken2` to the run. For example: `kraken2--bracken.tsv`. However in most cases you want to use the actual Bracken file (i.e., `bracken_.tsv`).
- `/`
- `_.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample
- `_.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample
- - `_.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample. Will be 6 column rather than 8 if `--save_minimizers` specified.
+ - `_.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample. Will be 6 column rather than 8 if `--save_minimizers` specified. This report will **only** be included if you supply `--bracken_save_intermediatekraken2` to the run.
- `_.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample
From b409e56d0523cabd40afb8c2a3e1fe449ab4bc7e Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 30 May 2024 10:56:17 +0200
Subject: [PATCH 27/35] Update CHANGELOG.md
---
CHANGELOG.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5134ae3f..51c3c88d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Fixed`
- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133)
-- [491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133)
+- [#491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133)
### `Dependencies`
From ee31ff0ad0f38e15b0dd83eda5f71a85a8c7a955 Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Date: Thu, 30 May 2024 10:59:22 +0200
Subject: [PATCH 28/35] Update CHANGELOG.md
---
CHANGELOG.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 51c3c88d..347d0604 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133)
- [#491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133)
+- [489](https://github.com/nf-core/taxprofiler/pull/489) Fix KrakenUniq classified reads output format mismatch (❤️ to @SannaAb for reporting, fixed by @jfy133)
### `Dependencies`
From 94812b3378bf03431311ae93ddeaa81f75ee895d Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Mon, 10 Jun 2024 14:36:21 +0200
Subject: [PATCH 29/35] Remove --kmcp_mode and bump KMCP version
---
CHANGELOG.md | 6 +
conf/test.config | 1 -
conf/test_adapterremoval.config | 1 -
conf/test_bbduk.config | 1 -
conf/test_falco.config | 1 -
conf/test_fastp.config | 1 -
conf/test_krakenuniq.config | 1 -
conf/test_malt.config | 1 -
conf/test_motus.config | 1 -
conf/test_nopreprocessing.config | 1 -
conf/test_noprofiling.config | 1 -
conf/test_nothing.config | 1 -
conf/test_prinseqplusplus.config | 1 -
.../nf-core-taxprofiler_logo_generic.svg | 2309 +++++++++++++++++
modules.json | 2 +-
modules/nf-core/kmcp/search/environment.yml | 7 +
modules/nf-core/kmcp/search/main.nf | 10 +-
modules/nf-core/kmcp/search/meta.yml | 15 +-
.../nf-core/kmcp/search/tests/main.nf.test | 88 +
.../kmcp/search/tests/main.nf.test.snap | 72 +
.../nf-core/kmcp/search/tests/nextflow.config | 5 +
modules/nf-core/kmcp/search/tests/tags.yml | 2 +
nextflow.config | 1 -
nextflow_schema.json | 7 -
subworkflows/local/profiling.nf | 4 +-
25 files changed, 2504 insertions(+), 36 deletions(-)
create mode 100644 docs/images/nf-core-taxprofiler_logo_generic.svg
create mode 100644 modules/nf-core/kmcp/search/environment.yml
create mode 100644 modules/nf-core/kmcp/search/tests/main.nf.test
create mode 100644 modules/nf-core/kmcp/search/tests/main.nf.test.snap
create mode 100644 modules/nf-core/kmcp/search/tests/nextflow.config
create mode 100644 modules/nf-core/kmcp/search/tests/tags.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 347d0604..e98482ef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,8 +17,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Dependencies`
+| Tool | Previous version | New version |
+| ---- | ---------------- | ----------- |
+| KMCP | 0.9.1 | 0.9.4 |
+
### `Deprecated`
+- [#492](https://github.com/nf-core/taxprofiler/pull/492) Removed `--kmcp_mode` parameter from KMCP to allow per database specification by setting in db_params in database sheet (fixed by @jfy133)
+
## v1.1.7 - Augmented Akita Patch [2024-04-25]
### `Added`
diff --git a/conf/test.config b/conf/test.config
index e8cd48da..042dc2fa 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -42,7 +42,6 @@ params {
run_ganon = true
run_krona = true
run_kmcp = true
- kmcp_mode = 0
krona_taxonomy_directory = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
malt_save_reads = true
kraken2_save_reads = true
diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config
index 5fb6d362..ee55ba55 100644
--- a/conf/test_adapterremoval.config
+++ b/conf/test_adapterremoval.config
@@ -41,7 +41,6 @@ params {
run_motus = false
run_ganon = false
run_kmcp = false
- kmcp_mode = 0
}
process {
diff --git a/conf/test_bbduk.config b/conf/test_bbduk.config
index e92fea8c..d0ff530a 100644
--- a/conf/test_bbduk.config
+++ b/conf/test_bbduk.config
@@ -41,7 +41,6 @@ params {
run_motus = false
run_ganon = false
run_kmcp = false
- kmcp_mode = 0
}
process {
diff --git a/conf/test_falco.config b/conf/test_falco.config
index 03d80593..8bcd9889 100644
--- a/conf/test_falco.config
+++ b/conf/test_falco.config
@@ -41,7 +41,6 @@ params {
run_motus = false
run_ganon = false
run_kmcp = false
- kmcp_mode = 0
}
process {
diff --git a/conf/test_fastp.config b/conf/test_fastp.config
index 52767546..81bec14c 100644
--- a/conf/test_fastp.config
+++ b/conf/test_fastp.config
@@ -42,7 +42,6 @@ params {
run_motus = false
run_ganon = false
run_kmcp = false
- kmcp_mode = 0
}
process {
diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config
index f116af38..fc6305de 100644
--- a/conf/test_krakenuniq.config
+++ b/conf/test_krakenuniq.config
@@ -44,7 +44,6 @@ params {
run_krakenuniq = true
run_motus = false
run_kmcp = false
- kmcp_mode = 0
run_ganon = false
run_krona = true
krona_taxonomy_directory = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
diff --git a/conf/test_malt.config b/conf/test_malt.config
index d4a91771..7d9bd2b6 100644
--- a/conf/test_malt.config
+++ b/conf/test_malt.config
@@ -44,7 +44,6 @@ params {
run_motus = false
run_ganon = false
run_kmcp = false
- kmcp_mode = 0
}
process {
diff --git a/conf/test_motus.config b/conf/test_motus.config
index 44cc36ff..c2d4ac22 100644
--- a/conf/test_motus.config
+++ b/conf/test_motus.config
@@ -43,7 +43,6 @@ params {
run_krakenuniq = false
run_motus = true
run_kmcp = false
- kmcp_mode = 0
run_ganon = false
motus_save_mgc_read_counts = false
motus_remove_ncbi_ids = false
diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config
index 98f34b07..42014303 100644
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@@ -39,7 +39,6 @@ params {
run_krakenuniq = true
run_motus = false
run_kmcp = true
- kmcp_mode = 0
run_ganon = true
run_krona = true
}
diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config
index b1a52922..4e917fb9 100644
--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@@ -40,7 +40,6 @@ params {
run_krakenuniq = false
run_motus = false
run_kmcp = false
- kmcp_mode = 0
run_ganon = false
}
diff --git a/conf/test_nothing.config b/conf/test_nothing.config
index 504fe4ed..d36c76d4 100644
--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@@ -39,7 +39,6 @@ params {
run_krakenuniq = false
run_motus = false
run_kmcp = false
- kmcp_mode = 0
run_ganon = false
}
diff --git a/conf/test_prinseqplusplus.config b/conf/test_prinseqplusplus.config
index 19fb2a60..c7ce2259 100644
--- a/conf/test_prinseqplusplus.config
+++ b/conf/test_prinseqplusplus.config
@@ -41,7 +41,6 @@ params {
run_motus = false
run_ganon = false
run_kmcp = false
- kmcp_mode = 0
}
process {
diff --git a/docs/images/nf-core-taxprofiler_logo_generic.svg b/docs/images/nf-core-taxprofiler_logo_generic.svg
new file mode 100644
index 00000000..632997ca
--- /dev/null
+++ b/docs/images/nf-core-taxprofiler_logo_generic.svg
@@ -0,0 +1,2309 @@
+
+
+
+
diff --git a/modules.json b/modules.json
index ecb96290..46fd242c 100644
--- a/modules.json
+++ b/modules.json
@@ -117,7 +117,7 @@
},
"kmcp/search": {
"branch": "master",
- "git_sha": "e198734cc3be18af5f64f6d7734c7f1a7c3af5a6",
+ "git_sha": "64cd3f418b191a008b9d362b8ccf0216ae0302d5",
"installed_by": ["modules"]
},
"kraken2/kraken2": {
diff --git a/modules/nf-core/kmcp/search/environment.yml b/modules/nf-core/kmcp/search/environment.yml
new file mode 100644
index 00000000..397fcb8a
--- /dev/null
+++ b/modules/nf-core/kmcp/search/environment.yml
@@ -0,0 +1,7 @@
+name: kmcp_search
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::kmcp=0.9.4
diff --git a/modules/nf-core/kmcp/search/main.nf b/modules/nf-core/kmcp/search/main.nf
index cb2d6843..62f74aeb 100644
--- a/modules/nf-core/kmcp/search/main.nf
+++ b/modules/nf-core/kmcp/search/main.nf
@@ -2,14 +2,14 @@ process KMCP_SEARCH {
tag "$meta.id"
label 'process_medium'
- conda "bioconda::kmcp=0.9.1"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/kmcp:0.9.1--h9ee0642_0':
- 'biocontainers/kmcp:0.9.1--h9ee0642_0' }"
+ 'https://depot.galaxyproject.org/singularity/kmcp:0.9.4--h9ee0642_0':
+ 'biocontainers/kmcp:0.9.4--h9ee0642_0' }"
input:
- path(db)
tuple val(meta), path(reads)
+ path(db)
output:
tuple val(meta), path("*.gz") , emit: result
@@ -40,7 +40,7 @@ process KMCP_SEARCH {
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
- touch ${prefix}.gz
+ echo "" | gzip > ${prefix}.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/kmcp/search/meta.yml b/modules/nf-core/kmcp/search/meta.yml
index 5526a179..6d003b04 100644
--- a/modules/nf-core/kmcp/search/meta.yml
+++ b/modules/nf-core/kmcp/search/meta.yml
@@ -14,23 +14,21 @@ tools:
documentation: "https://github.com/shenwei356/kmcp#documents"
tool_dev_url: "https://github.com/shenwei356/kmcp"
doi: "10.1093/bioinformatics/btac845"
- licence: "['MIT']"
-
+ licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- - db:
- type: directory
- description: Database directory created by "kmcp index"
- pattern: "*"
- reads:
type: file
description: gzipped fasta or fastq files
pattern: "*.{fq.gz,fastq.gz,fa.gz}"
-
+ - db:
+ type: directory
+ description: Database directory created by "kmcp index"
+ pattern: "*"
output:
- meta:
type: map
@@ -45,6 +43,7 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
-
authors:
- "@sofstam"
+maintainers:
+ - "@sofstam"
diff --git a/modules/nf-core/kmcp/search/tests/main.nf.test b/modules/nf-core/kmcp/search/tests/main.nf.test
new file mode 100644
index 00000000..adc693e6
--- /dev/null
+++ b/modules/nf-core/kmcp/search/tests/main.nf.test
@@ -0,0 +1,88 @@
+nextflow_process {
+
+ name "Test Process KMCP_SEARCH"
+ script "../main.nf"
+ process "KMCP_SEARCH"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "kmcp"
+ tag "kmcp/search"
+ tag "kmcp/compute"
+ tag "kmcp/index"
+
+ setup {
+ run("KMCP_COMPUTE") {
+ script "../../../kmcp/compute/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ run("KMCP_INDEX") {
+ script "../../../kmcp/index/main.nf"
+ process {
+ """
+ input[0] = KMCP_COMPUTE.out.outdir
+ """
+ }
+ }
+ }
+
+ test("sarscov2 - fasta") {
+ when {
+ process {
+ """
+ input[0] = [
+ [id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = KMCP_INDEX.out.kmcp.map{it[1]}
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out,
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fasta - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = KMCP_INDEX.out.kmcp.map{it[1]}
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/kmcp/search/tests/main.nf.test.snap b/modules/nf-core/kmcp/search/tests/main.nf.test.snap
new file mode 100644
index 00000000..e245e2e9
--- /dev/null
+++ b/modules/nf-core/kmcp/search/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+ "sarscov2 - fasta - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,69d488bec087e13e13bef0482633b6c3"
+ ],
+ "result": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,69d488bec087e13e13bef0482633b6c3"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T14:41:54.308010562"
+ },
+ "sarscov2 - fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.gz:md5,84953d3d517f65722d43a2c3fdd04935"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,69d488bec087e13e13bef0482633b6c3"
+ ],
+ "result": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.gz:md5,84953d3d517f65722d43a2c3fdd04935"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,69d488bec087e13e13bef0482633b6c3"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-06T21:35:20.053225742"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/kmcp/search/tests/nextflow.config b/modules/nf-core/kmcp/search/tests/nextflow.config
new file mode 100644
index 00000000..9366eab4
--- /dev/null
+++ b/modules/nf-core/kmcp/search/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: KMCP_INDEX {
+ ext.prefix = { "${meta.id}_kmcp" }
+ }
+}
diff --git a/modules/nf-core/kmcp/search/tests/tags.yml b/modules/nf-core/kmcp/search/tests/tags.yml
new file mode 100644
index 00000000..e281416c
--- /dev/null
+++ b/modules/nf-core/kmcp/search/tests/tags.yml
@@ -0,0 +1,2 @@
+kmcp/search:
+ - "modules/nf-core/kmcp/search/**"
diff --git a/nextflow.config b/nextflow.config
index 1b105692..83cbf39f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -163,7 +163,6 @@ params {
// kmcp
run_kmcp = false
- kmcp_mode = 3 // default kmcp profiling value
kmcp_save_search = false
// ganon
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 0e4185b5..deef481a 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -528,13 +528,6 @@
"description": "Turn on classification with KMCP.",
"fa_icon": "fas fa-toggle-on"
},
- "kmcp_mode": {
- "type": "integer",
- "default": 3,
- "description": "Specify which KMCP profiling mode to use.",
- "help_text": "Available values: \n0 (for pathogen detection)\n1 (higherrecall)\n2 (high recall)\n3 (default)\n4 (high precision)\n5 (higher precision).\nFor more information about the different profiling modes, please see the [kmcp documentation](https://bioinf.shenwei.me/kmcp/usage/#profile)\n\n> Modifies tool parameter(s):\n- kmcp profile: `--mode`\n\n",
- "fa_icon": "fas fa-check-square"
- },
"kmcp_save_search": {
"type": "boolean",
"fa_icon": "fas fa-save",
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 2bd04564..09b8ac19 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -416,7 +416,7 @@ workflow PROFILING {
db: it[3]
}
- KMCP_SEARCH ( ch_input_for_kmcp.db, ch_input_for_kmcp.reads )
+ KMCP_SEARCH ( ch_input_for_kmcp.reads, ch_input_for_kmcp.db )
ch_versions = ch_versions.mix( KMCP_SEARCH.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix(KMCP_SEARCH.out.result)
@@ -453,7 +453,7 @@ workflow PROFILING {
}
//Generate kmcp profile
- KMCP_PROFILE( ch_input_for_kmcp_profile.report, ch_input_for_kmcp.db, params.kmcp_mode )
+ KMCP_PROFILE( ch_input_for_kmcp_profile.report, ch_input_for_kmcp.db )
ch_versions = ch_versions.mix( KMCP_PROFILE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( KMCP_PROFILE.out.profile )
ch_multiqc_files = ch_multiqc_files.mix( KMCP_PROFILE.out.profile )
From 124a9c937d356af0608370254776a30074358fea Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Mon, 10 Jun 2024 14:46:14 +0200
Subject: [PATCH 30/35] Actually include KMCP Profile
---
modules.json | 2 +-
modules/nf-core/kmcp/profile/environment.yml | 7 ++
modules/nf-core/kmcp/profile/main.nf | 8 +-
modules/nf-core/kmcp/profile/meta.yml | 22 ++--
.../nf-core/kmcp/profile/tests/main.nf.test | 109 ++++++++++++++++++
.../kmcp/profile/tests/main.nf.test.snap | 72 ++++++++++++
.../kmcp/profile/tests/nextflow.config | 13 +++
modules/nf-core/kmcp/profile/tests/tags.yml | 2 +
8 files changed, 214 insertions(+), 21 deletions(-)
create mode 100644 modules/nf-core/kmcp/profile/environment.yml
create mode 100644 modules/nf-core/kmcp/profile/tests/main.nf.test
create mode 100644 modules/nf-core/kmcp/profile/tests/main.nf.test.snap
create mode 100644 modules/nf-core/kmcp/profile/tests/nextflow.config
create mode 100644 modules/nf-core/kmcp/profile/tests/tags.yml
diff --git a/modules.json b/modules.json
index 46fd242c..04001af6 100644
--- a/modules.json
+++ b/modules.json
@@ -112,7 +112,7 @@
},
"kmcp/profile": {
"branch": "master",
- "git_sha": "e198734cc3be18af5f64f6d7734c7f1a7c3af5a6",
+ "git_sha": "6f56948d0674ad5870035e80c7af209a51d8e243",
"installed_by": ["modules"]
},
"kmcp/search": {
diff --git a/modules/nf-core/kmcp/profile/environment.yml b/modules/nf-core/kmcp/profile/environment.yml
new file mode 100644
index 00000000..43de2a64
--- /dev/null
+++ b/modules/nf-core/kmcp/profile/environment.yml
@@ -0,0 +1,7 @@
+name: kmcp_profile
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::kmcp=0.9.4
diff --git a/modules/nf-core/kmcp/profile/main.nf b/modules/nf-core/kmcp/profile/main.nf
index a4672122..3de9fdb7 100644
--- a/modules/nf-core/kmcp/profile/main.nf
+++ b/modules/nf-core/kmcp/profile/main.nf
@@ -2,15 +2,14 @@ process KMCP_PROFILE {
tag "$meta.id"
label 'process_medium'
- conda "bioconda::kmcp=0.9.1"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/kmcp:0.9.1--h9ee0642_0':
- 'biocontainers/kmcp:0.9.1--h9ee0642_0' }"
+ 'https://depot.galaxyproject.org/singularity/kmcp:0.9.4--h9ee0642_0':
+ 'biocontainers/kmcp:0.9.4--h9ee0642_0' }"
input:
tuple val(meta), path(search_results)
path (db)
- val mode
output:
tuple val(meta), path("*.profile"), emit: profile
@@ -30,7 +29,6 @@ process KMCP_PROFILE {
$args \\
-X \$taxdump \\
-T \$taxid \\
- -m $mode \\
-j $task.cpus \\
-o ${prefix}.profile \\
$search_results
diff --git a/modules/nf-core/kmcp/profile/meta.yml b/modules/nf-core/kmcp/profile/meta.yml
index 14f292c7..ba1ca2a2 100644
--- a/modules/nf-core/kmcp/profile/meta.yml
+++ b/modules/nf-core/kmcp/profile/meta.yml
@@ -15,30 +15,21 @@ tools:
documentation: "https://bioinf.shenwei.me/kmcp/usage/#profile"
tool_dev_url: "https://github.com/shenwei356/kmcp"
doi: "10.1093/bioinformatics/btac845"
- licence: "['MIT']"
-
+ licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- - db:
- type: directory
- description: Database directory containing taxdump files and taxid file
- search_results:
type: file
description: Gzipped file output from kmcp search module
pattern: "*.gz"
- - mode:
- type: integer
- description: Profiling mode.
- 0-pathogen detection
- 1-higher recall
- 2-high recall
- 3-default
- 4-high precision
- 5-higher precision
+ - db:
+ type: directory
+ description: Database directory containing taxdump files and taxid file
+
output:
- meta:
type: map
@@ -53,6 +44,7 @@ output:
type: file
description: Tab-delimited format file with 17 columns.
pattern: "*.profile"
-
authors:
- "@sofstam"
+maintainers:
+ - "@sofstam"
diff --git a/modules/nf-core/kmcp/profile/tests/main.nf.test b/modules/nf-core/kmcp/profile/tests/main.nf.test
new file mode 100644
index 00000000..20b303ed
--- /dev/null
+++ b/modules/nf-core/kmcp/profile/tests/main.nf.test
@@ -0,0 +1,109 @@
+nextflow_process {
+
+ name "Test Process KMCP_PROFILE"
+ script "../main.nf"
+ process "KMCP_PROFILE"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "kmcp"
+ tag "kmcp/profile"
+ tag "untar"
+ tag "kmcp/compute"
+ tag "kmcp/index"
+ tag "kmcp/search"
+
+ setup {
+ run("UNTAR") {
+ script "../../../untar/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + 'delete_me/kmcp/kmcp_profile.tar.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ run("KMCP_COMPUTE") {
+ script "../../../kmcp/compute/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ run("KMCP_INDEX") {
+ script "../../../kmcp/index/main.nf"
+ process {
+ """
+ input[0] = KMCP_COMPUTE.out.outdir
+ """
+ }
+ }
+
+ run("KMCP_SEARCH") {
+ script "../../../kmcp/search/main.nf"
+ process {
+ """
+ input[0] = [
+ [id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = KMCP_INDEX.out.kmcp.map{it[1]}
+ """
+ }
+ }
+ }
+
+ test("sarscov2 - fasta") {
+ when {
+ process {
+ """
+ input[0] = KMCP_SEARCH.out.result
+ input[1] = UNTAR.out.untar.map{it[1]}
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out,
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fasta - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = KMCP_SEARCH.out.result
+ input[1] = UNTAR.out.untar.map{it[1]}
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/kmcp/profile/tests/main.nf.test.snap b/modules/nf-core/kmcp/profile/tests/main.nf.test.snap
new file mode 100644
index 00000000..72b41ce8
--- /dev/null
+++ b/modules/nf-core/kmcp/profile/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+ "sarscov2 - fasta - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_kmcp.profile:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,71f3499117cd6d006be15365b761d38b"
+ ],
+ "profile": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_kmcp.profile:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,71f3499117cd6d006be15365b761d38b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T15:36:20.331533599"
+ },
+ "sarscov2 - fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_kmcp.profile:md5,d7318c8f2d578ea2e6355f05789db4f3"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,71f3499117cd6d006be15365b761d38b"
+ ],
+ "profile": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_kmcp.profile:md5,d7318c8f2d578ea2e6355f05789db4f3"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,71f3499117cd6d006be15365b761d38b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-07T15:36:13.138318234"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/kmcp/profile/tests/nextflow.config b/modules/nf-core/kmcp/profile/tests/nextflow.config
new file mode 100644
index 00000000..bee9ca0c
--- /dev/null
+++ b/modules/nf-core/kmcp/profile/tests/nextflow.config
@@ -0,0 +1,13 @@
+process {
+ withName: UNTAR {
+ ext.args2 = {"--no-same-owner"}
+ }
+
+ withName: KMCP_COMPUTE {
+ ext.prefix = { "${meta.id}_kmcp" }
+ }
+
+ withName: KMCP_PROFILE {
+ ext.prefix = { "${meta.id}_kmcp" }
+ }
+}
diff --git a/modules/nf-core/kmcp/profile/tests/tags.yml b/modules/nf-core/kmcp/profile/tests/tags.yml
new file mode 100644
index 00000000..7af489b0
--- /dev/null
+++ b/modules/nf-core/kmcp/profile/tests/tags.yml
@@ -0,0 +1,2 @@
+kmcp/profile:
+ - "modules/nf-core/kmcp/profile/**"
From d7497690ddf42e15b12da46ed1ff94cb8b28600b Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Wed, 12 Jun 2024 20:43:54 +0200
Subject: [PATCH 31/35] Update Taxpasta, which stops overly strict failures
---
CHANGELOG.md | 10 +-
conf/modules.config | 18 +--
modules.json | 4 +-
.../nf-core/taxpasta/merge/environment.yml | 7 ++
modules/nf-core/taxpasta/merge/main.nf | 30 +++--
modules/nf-core/taxpasta/merge/meta.yml | 27 +++--
.../nf-core/taxpasta/merge/tests/main.nf.test | 111 ++++++++++++++++++
.../taxpasta/merge/tests/main.nf.test.snap | 83 +++++++++++++
modules/nf-core/taxpasta/merge/tests/tags.yml | 2 +
.../taxpasta/standardise/environment.yml | 7 ++
modules/nf-core/taxpasta/standardise/main.nf | 29 +++--
modules/nf-core/taxpasta/standardise/meta.yml | 16 ++-
.../taxpasta/standardise/tests/main.nf.test | 58 +++++++++
.../standardise/tests/main.nf.test.snap | 68 +++++++++++
.../taxpasta/standardise/tests/tags.yml | 2 +
.../local/standardisation_profiles.nf | 18 ++-
16 files changed, 438 insertions(+), 52 deletions(-)
create mode 100644 modules/nf-core/taxpasta/merge/environment.yml
create mode 100644 modules/nf-core/taxpasta/merge/tests/main.nf.test
create mode 100644 modules/nf-core/taxpasta/merge/tests/main.nf.test.snap
create mode 100644 modules/nf-core/taxpasta/merge/tests/tags.yml
create mode 100644 modules/nf-core/taxpasta/standardise/environment.yml
create mode 100644 modules/nf-core/taxpasta/standardise/tests/main.nf.test
create mode 100644 modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap
create mode 100644 modules/nf-core/taxpasta/standardise/tests/tags.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e98482ef..75d506df 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,13 +13,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133)
- [#491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133)
-- [489](https://github.com/nf-core/taxprofiler/pull/489) Fix KrakenUniq classified reads output format mismatch (❤️ to @SannaAb for reporting, fixed by @jfy133)
+- [#489](https://github.com/nf-core/taxprofiler/pull/489) Fix KrakenUniq classified reads output format mismatch (❤️ to @SannaAb for reporting, fixed by @jfy133)
+- [#493](https://github.com/nf-core/taxprofiler/pull/489) Stop TAXPASTA failures when profiles do not have exact compositionality (fixes by @Midnighter, @jfy133)
### `Dependencies`
-| Tool | Previous version | New version |
-| ---- | ---------------- | ----------- |
-| KMCP | 0.9.1 | 0.9.4 |
+| Tool | Previous version | New version |
+| -------- | ---------------- | ----------- |
+| KMCP | 0.9.1 | 0.9.4 |
+| TAXPASTA | 0.6.1 | 0.7.0 |
### `Deprecated`
diff --git a/conf/modules.config b/conf/modules.config
index 2148b69f..ee0bd5b5 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -754,15 +754,14 @@ process {
withName: TAXPASTA_MERGE {
tag = { "${meta.tool}|${meta.id}" }
+ ext.prefix = { "${meta.tool}_${meta.id}" }
ext.args = {
[
- "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
params.taxpasta_add_name ? "--add-name" : "",
params.taxpasta_add_rank ? "--add-rank" : "",
params.taxpasta_add_lineage ? "--add-lineage" : "",
params.taxpasta_add_idlineage ? "--add-id-lineage" : "",
params.taxpasta_add_ranklineage ? "--add-rank-lineage" : "",
- params.taxpasta_ignore_errors ? "--ignore-errors" : ""
].join(' ').trim()
}
publishDir = [
@@ -774,14 +773,15 @@ process {
withName: TAXPASTA_STANDARDISE {
tag = { "${meta.tool}|${meta.id}" }
+ ext.prefix = { "${meta.tool}_${meta.id}" }
ext.args = {
- [
- "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
- params.taxpasta_add_name ? "--add-name" : "",
- params.taxpasta_add_rank ? "--add-rank" : "",
- params.taxpasta_add_lineage ? "--add-lineage" : "",
- params.taxpasta_add_idlineage ? "--add-id-lineage" : ""
- ].join(' ').trim()
+ [
+ params.taxpasta_add_name ? "--add-name" : "",
+ params.taxpasta_add_rank ? "--add-rank" : "",
+ params.taxpasta_add_lineage ? "--add-lineage" : "",
+ params.taxpasta_add_idlineage ? "--add-id-lineage" : "",
+ params.taxpasta_add_ranklineage ? "--add-rank-lineage" : ""
+ ].join(' ').trim()
}
publishDir = [
path: { "${params.outdir}/taxpasta/" },
diff --git a/modules.json b/modules.json
index 04001af6..b7fc0290 100644
--- a/modules.json
+++ b/modules.json
@@ -228,12 +228,12 @@
},
"taxpasta/merge": {
"branch": "master",
- "git_sha": "48019785051ba491e82dce910273c2eca61bd5b7",
+ "git_sha": "4fd9089d3cf904e0b870d5a6a7ab903ee5e1004d",
"installed_by": ["modules"]
},
"taxpasta/standardise": {
"branch": "master",
- "git_sha": "48019785051ba491e82dce910273c2eca61bd5b7",
+ "git_sha": "4fd9089d3cf904e0b870d5a6a7ab903ee5e1004d",
"installed_by": ["modules"]
},
"untar": {
diff --git a/modules/nf-core/taxpasta/merge/environment.yml b/modules/nf-core/taxpasta/merge/environment.yml
new file mode 100644
index 00000000..ca1a10b7
--- /dev/null
+++ b/modules/nf-core/taxpasta/merge/environment.yml
@@ -0,0 +1,7 @@
+name: taxpasta_merge
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::taxpasta=0.7.0
diff --git a/modules/nf-core/taxpasta/merge/main.nf b/modules/nf-core/taxpasta/merge/main.nf
index de135221..662f6e79 100644
--- a/modules/nf-core/taxpasta/merge/main.nf
+++ b/modules/nf-core/taxpasta/merge/main.nf
@@ -2,14 +2,16 @@ process TAXPASTA_MERGE {
tag "$meta.id"
label 'process_single'
- conda "bioconda::taxpasta=0.6.1"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/taxpasta:0.6.1--pyhdfd78af_0':
- 'biocontainers/taxpasta:0.6.1--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/taxpasta:0.7.0--pyhdfd78af_0':
+ 'biocontainers/taxpasta:0.7.0--pyhdfd78af_0' }"
input:
tuple val(meta), path(profiles)
+ val profiler
+ val format
path taxonomy
path samplesheet
@@ -21,24 +23,34 @@ process TAXPASTA_MERGE {
task.ext.when == null || task.ext.when
script:
- // N.B.: Taxpasta requires a --profiler option and will fail without it.
- // This must be specified via a `nextflow.config` or `modules.config`, for
- // example, as "--profiler kraken2". Additionally, it requires a --output
- // option with the output file name. The desired format will be parsed from
- // the name and should correspond to the output pattern specified above,
- // e.g., "--output ${task.ext.prefix}.tsv".
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : ''
def samplesheet_input = samplesheet ? "-s ${samplesheet}" : ''
"""
taxpasta merge \\
+ --profiler $profiler \\
+ --output ${prefix}.${format} \\
$args \\
$taxonomy_option \\
$samplesheet_input \\
$profiles
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ taxpasta: \$(taxpasta --version)
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : ''
+ def samplesheet_input = samplesheet ? "-s ${samplesheet}" : ''
+ """
+ touch ${prefix}.${format}
+
cat <<-END_VERSIONS > versions.yml
"${task.process}":
taxpasta: \$(taxpasta --version)
diff --git a/modules/nf-core/taxpasta/merge/meta.yml b/modules/nf-core/taxpasta/merge/meta.yml
index ed89e62c..a4cbed94 100644
--- a/modules/nf-core/taxpasta/merge/meta.yml
+++ b/modules/nf-core/taxpasta/merge/meta.yml
@@ -14,9 +14,7 @@ tools:
homepage: "https://taxpasta.readthedocs.io/"
documentation: "https://taxpasta.readthedocs.io/"
tool_dev_url: "https://github.com/taxprofiler/taxpasta"
-
- licence: "['Apache-2.0']"
-
+ licence: ["Apache-2.0"]
input:
- meta:
type: map
@@ -27,17 +25,22 @@ input:
type: file
description: A list of taxonomic profiler output files (typically in text format, mandatory)
pattern: "*.{tsv,csv,arrow,parquet,biom}"
- - samplesheet:
- type: file
- description:
- A samplesheet describing the sample name and a filepath to a taxonomic abundance profile that needs to be relative
- from the work environment. The profiles must be provided even if you give a samplesheet as argument (optional)
- pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet}"
+ - profiler:
+ type: string
+ description: Name of the profiler used to generate the profile (mandatory)
+ pattern: "bracken|centrifuge|diamond|ganon|kaiju|kmcp|kraken2|krakenuniq|megan6|metaphlan|motus"
+ - format:
+ type: string
+ description: Type of output file to be generated
+ pattern: "tsv|csv|ods|xlsx|arrow|parquet|biom"
- taxonomy:
type: directory
description: Directory containing at a minimum nodes.dmp and names.dmp files (optional)
pattern: "*/"
-
+ - samplesheet:
+ type: file
+ description: A samplesheet describing the sample name and a filepath to a taxonomic abundance profile that needs to be relative from the Nextflow work directory of the executed process. The profiles must be provided even if you give a samplesheet as argument (optional)
+ pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet}"
output:
- meta:
type: map
@@ -52,7 +55,9 @@ output:
type: file
description: Output file with standardised multiple profiles in one go and have all profiles combined into a single table.
pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet,biom}"
-
authors:
- "@sofstam"
- "@jfy133"
+maintainers:
+ - "@sofstam"
+ - "@jfy133"
diff --git a/modules/nf-core/taxpasta/merge/tests/main.nf.test b/modules/nf-core/taxpasta/merge/tests/main.nf.test
new file mode 100644
index 00000000..886e93b9
--- /dev/null
+++ b/modules/nf-core/taxpasta/merge/tests/main.nf.test
@@ -0,0 +1,111 @@
+nextflow_process {
+
+ name "Test Process TAXPASTA_MERGE"
+ script "../main.nf"
+ process "TAXPASTA_MERGE"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "taxpasta"
+ tag "taxpasta/merge"
+
+ test("sarscov2 - metagenome - kraken report") {
+
+ when {
+ process {
+ """
+ ch_test1_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_1.kraken2.report.txt')
+ ch_test2_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_2.kraken2.report.txt')
+
+ input[0] = ch_test1_kraken.mix ( ch_test2_kraken )
+ .collect()
+ .map { files ->
+ def meta = [:]
+ meta['id'] = 'kraken2'
+ meta['profiler'] = 'kraken2'
+ [meta, files.sort()]
+
+ }
+ input[1] = 'kraken2'
+ input[2] = 'tsv'
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.merged_profiles[0][1]).readLines().any { it.contains('2697049 100 100') },
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - metagenome - kraken report - samplesheet") {
+
+ when {
+ process {
+ """
+ ch_test1_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_1.kraken2.report.txt')
+ ch_test2_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_2.kraken2.report.txt')
+
+ input[0] = ch_test1_kraken.mix ( ch_test2_kraken )
+ .collect()
+ .map { files ->
+ def meta = [:]
+ meta['id'] = 'kraken2'
+ meta['profiler'] = 'kraken2'
+ [meta, files.sort()]
+ }
+ input[1] = 'kraken2'
+ input[2] = 'tsv'
+ input[3] = []
+ input[4] = Channel.of(
+ 'sample\tprofile',
+ 'test_1\t"test_1.kraken2.report.txt"',
+ 'test_2\t"test_2.kraken2.report.txt"'
+ )
+ .collectFile(name: 'samplesheet.tsv', newLine: true, sort: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - metagenome - kraken report - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [[id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true)]
+ input[1] = 'kraken2'
+ input[2] = 'tsv'
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/taxpasta/merge/tests/main.nf.test.snap b/modules/nf-core/taxpasta/merge/tests/main.nf.test.snap
new file mode 100644
index 00000000..afb7e491
--- /dev/null
+++ b/modules/nf-core/taxpasta/merge/tests/main.nf.test.snap
@@ -0,0 +1,83 @@
+{
+ "sarscov2 - metagenome - kraken report": {
+ "content": [
+ true,
+ [
+ "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-11T06:13:35.985987277"
+ },
+ "sarscov2 - metagenome - kraken report - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d"
+ ],
+ "merged_profiles": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-10T22:04:59.08186623"
+ },
+ "sarscov2 - metagenome - kraken report - samplesheet": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "kraken2",
+ "profiler": "kraken2"
+ },
+ "kraken2.tsv:md5,3a31a2bbff49f6e03083a2e03f4f6563"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d"
+ ],
+ "merged_profiles": [
+ [
+ {
+ "id": "kraken2",
+ "profiler": "kraken2"
+ },
+ "kraken2.tsv:md5,3a31a2bbff49f6e03083a2e03f4f6563"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-10T22:25:57.576974385"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/taxpasta/merge/tests/tags.yml b/modules/nf-core/taxpasta/merge/tests/tags.yml
new file mode 100644
index 00000000..5d17220c
--- /dev/null
+++ b/modules/nf-core/taxpasta/merge/tests/tags.yml
@@ -0,0 +1,2 @@
+taxpasta/merge:
+ - "modules/nf-core/taxpasta/merge/**"
diff --git a/modules/nf-core/taxpasta/standardise/environment.yml b/modules/nf-core/taxpasta/standardise/environment.yml
new file mode 100644
index 00000000..a48f08e0
--- /dev/null
+++ b/modules/nf-core/taxpasta/standardise/environment.yml
@@ -0,0 +1,7 @@
+name: taxpasta_standardise
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::taxpasta=0.7.0
diff --git a/modules/nf-core/taxpasta/standardise/main.nf b/modules/nf-core/taxpasta/standardise/main.nf
index 7822912a..7b393517 100644
--- a/modules/nf-core/taxpasta/standardise/main.nf
+++ b/modules/nf-core/taxpasta/standardise/main.nf
@@ -2,13 +2,15 @@ process TAXPASTA_STANDARDISE {
tag "$meta.id"
label 'process_single'
- conda "bioconda::taxpasta=0.6.1"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/taxpasta:0.6.1--pyhdfd78af_0':
- 'biocontainers/taxpasta:0.6.1--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/taxpasta:0.7.0--pyhdfd78af_0':
+ 'biocontainers/taxpasta:0.7.0--pyhdfd78af_0' }"
input:
tuple val(meta), path(profile)
+ val profiler
+ val format
path taxonomy
output:
@@ -19,17 +21,13 @@ process TAXPASTA_STANDARDISE {
task.ext.when == null || task.ext.when
script:
- // N.B.: Taxpasta requires a --profiler option and will fail without it.
- // This must be specified via a `nextflow.config` or `modules.config`, for
- // example, as "--profiler kraken2". Additionally, it requires a --output
- // option with the output file name. The desired format will be parsed from
- // the name and should correspond to the output pattern specified above,
- // e.g., "--output ${task.ext.prefix}.tsv".
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : ''
"""
taxpasta standardise \\
+ --profiler $profiler \\
+ --output ${prefix}.${format} \\
$args \\
$taxonomy_option \\
$profile
@@ -39,4 +37,17 @@ process TAXPASTA_STANDARDISE {
taxpasta: \$(taxpasta --version)
END_VERSIONS
"""
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : ''
+ """
+ touch ${prefix}.${format}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ taxpasta: \$(taxpasta --version)
+ END_VERSIONS
+ """
}
diff --git a/modules/nf-core/taxpasta/standardise/meta.yml b/modules/nf-core/taxpasta/standardise/meta.yml
index 81df6e2c..b855905a 100644
--- a/modules/nf-core/taxpasta/standardise/meta.yml
+++ b/modules/nf-core/taxpasta/standardise/meta.yml
@@ -14,9 +14,7 @@ tools:
homepage: "https://taxpasta.readthedocs.io/"
documentation: "https://taxpasta.readthedocs.io/"
tool_dev_url: "https://github.com/taxprofiler/taxpasta"
-
- licence: "['Apache-2.0']"
-
+ licence: ["Apache-2.0"]
input:
- meta:
type: map
@@ -27,11 +25,18 @@ input:
type: file
description: profiler output file (mandatory)
pattern: "*"
+ - profiler:
+ type: string
+ description: Name of the profiler used to generate the profile (mandatory)
+ pattern: "bracken|centrifuge|diamond|ganon|kaiju|kmcp|kraken2|krakenuniq|megan6|metaphlan|motus"
+ - format:
+ type: string
+ description: Type of output file to be generated
+ pattern: "tsv|csv|ods|xlsx|arrow|parquet|biom"
- taxonomy:
type: directory
description: Directory containing at a minimum nodes.dmp and names.dmp files (optional)
pattern: "*/"
-
output:
- meta:
type: map
@@ -46,6 +51,7 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
-
authors:
- "@Midnighter"
+maintainers:
+ - "@Midnighter"
diff --git a/modules/nf-core/taxpasta/standardise/tests/main.nf.test b/modules/nf-core/taxpasta/standardise/tests/main.nf.test
new file mode 100644
index 00000000..e06ca7d6
--- /dev/null
+++ b/modules/nf-core/taxpasta/standardise/tests/main.nf.test
@@ -0,0 +1,58 @@
+nextflow_process {
+
+ name "Test Process TAXPASTA_STANDARDISE"
+ script "../main.nf"
+ process "TAXPASTA_STANDARDISE"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "taxpasta"
+ tag "taxpasta/standardise"
+
+ test("sarscov2 - metagenome - kraken report") {
+
+
+ when {
+ process {
+ """
+ input[0] = [[id: 'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true)]
+ input[1] = "kraken2"
+ input[2] = 'tsv'
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - metagenome - kraken report - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [[id: 'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true)]
+ input[1] = "kraken2"
+ input[2] = 'tsv'
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap b/modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap
new file mode 100644
index 00000000..498711e3
--- /dev/null
+++ b/modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+ "sarscov2 - metagenome - kraken report": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,6b62032ed403f431eeb0e17464ccc69b"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,bf00feb66945aab46a78efafac5a261f"
+ ],
+ "standardised_profile": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,6b62032ed403f431eeb0e17464ccc69b"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,bf00feb66945aab46a78efafac5a261f"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-11T12:35:34.381682299"
+ },
+ "sarscov2 - metagenome - kraken report - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,bf00feb66945aab46a78efafac5a261f"
+ ],
+ "standardised_profile": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,bf00feb66945aab46a78efafac5a261f"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-11T12:35:41.579178159"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/taxpasta/standardise/tests/tags.yml b/modules/nf-core/taxpasta/standardise/tests/tags.yml
new file mode 100644
index 00000000..43ec42dd
--- /dev/null
+++ b/modules/nf-core/taxpasta/standardise/tests/tags.yml
@@ -0,0 +1,2 @@
+taxpasta/standardise:
+ - "modules/nf-core/taxpasta/standardise/**"
diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf
index 95cd9d3f..ac204497 100644
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@@ -93,11 +93,23 @@ workflow STANDARDISATION_PROFILES {
standardise: true
}
+ ch_input_for_taxpasta_merge = ch_input_for_taxpasta.merge
+ .multiMap{ meta, profiles ->
+ profiles: [meta, profiles]
+ tool: meta.tool
+ }
- TAXPASTA_MERGE (ch_input_for_taxpasta.merge , ch_taxpasta_tax_dir, [])
+ ch_input_for_taxpasta_standardise = ch_input_for_taxpasta.standardise
+ .multiMap{ meta, profiles ->
+ profiles: [meta, profiles]
+ tool: meta.tool
+ }
+
+
+ TAXPASTA_MERGE ( ch_input_for_taxpasta_merge.profiles , ch_input_for_taxpasta_merge.tool , params.standardisation_taxpasta_format, ch_taxpasta_tax_dir, [] )
+ TAXPASTA_STANDARDISE ( ch_input_for_taxpasta_standardise.profiles, ch_input_for_taxpasta_standardise.tool, params.standardisation_taxpasta_format, ch_taxpasta_tax_dir )
ch_versions = ch_versions.mix( TAXPASTA_MERGE.out.versions.first() )
- TAXPASTA_STANDARDISE (ch_input_for_taxpasta.standardise, ch_taxpasta_tax_dir )
- ch_version = ch_versions.mix( TAXPASTA_STANDARDISE.out.versions.first() )
+ ch_versions = ch_versions.mix( TAXPASTA_STANDARDISE.out.versions.first() )
From 0a3ce1c52072bcac2e8fe32bcf35f643a46f9f82 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 13 Jun 2024 12:41:00 +0200
Subject: [PATCH 32/35] Update CHANGELOG.md
Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
---
CHANGELOG.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 75d506df..4b2619cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133)
- [#491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133)
- [#489](https://github.com/nf-core/taxprofiler/pull/489) Fix KrakenUniq classified reads output format mismatch (❤️ to @SannaAb for reporting, fixed by @jfy133)
-- [#493](https://github.com/nf-core/taxprofiler/pull/489) Stop TAXPASTA failures when profiles do not have exact compositionality (fixes by @Midnighter, @jfy133)
+- [#495](https://github.com/nf-core/taxprofiler/pull/495) Stop TAXPASTA failures when profiles do not have exact compositionality (fixes by @Midnighter, @jfy133)
### `Dependencies`
From ed7639c1b70a35e55d7841dc8ecb11efb7e33b4f Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 20 Jun 2024 08:23:47 +0200
Subject: [PATCH 33/35] Bump version for 1.1.8 rrelease
---
CHANGELOG.md | 2 +-
assets/multiqc_config.yml | 4 ++--
nextflow.config | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b2619cd..9f3e88de 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## v1.1.8dev - Augmented Akita Patch []
+## v1.1.8dev - Augmented Akita Patch [2024-06-20]
### `Added`
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 0d892103..e2801c44 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,8 +1,8 @@
report_comment: >
- This report has been generated by the nf-core/taxprofiler
+ This report has been generated by the nf-core/taxprofiler
analysis pipeline. For information about how to interpret these results, please see the
- documentation.
+ documentation.
report_section_order:
"nf-core-taxprofiler-methods-description":
diff --git a/nextflow.config b/nextflow.config
index 83cbf39f..638089c8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -385,7 +385,7 @@ manifest {
description = """Taxonomic classification and profiling of shotgun short- and long-read metagenomic data"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '1.1.8dev'
+ version = '1.1.8'
doi = '10.1101/2023.10.20.563221'
}
From e84e508acf0399ca24577ca9813177d9b142708e Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 20 Jun 2024 10:02:01 +0200
Subject: [PATCH 34/35] Update CHANGELOG.md
Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
---
CHANGELOG.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9f3e88de..088e8898 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## v1.1.8dev - Augmented Akita Patch [2024-06-20]
+## v1.1.8 - Augmented Akita Patch [2024-06-20]
### `Added`
From e21b3ed4fccd7d9ba11cae28b8337039a6e3443c Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates"
Date: Thu, 20 Jun 2024 16:21:20 +0200
Subject: [PATCH 35/35] Update CHANGELOG.md
Co-authored-by: Friederike Hanssen
---
CHANGELOG.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 088e8898..d512ab9f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`
-- [#487](https://github.com/nf-core/taxprofiler/pull/487) Updated to nf-core pipeline template v2.14.1 (added by jfy133)
+- [#487](https://github.com/nf-core/taxprofiler/pull/487) Updated to nf-core pipeline template v2.14.1 (added by @jfy133)
### `Fixed`