From 4b918c5ba595027effb666857cef075ec34980c5 Mon Sep 17 00:00:00 2001 From: Lili Andersson-Li <64467552+LilyAnderssonLee@users.noreply.github.com> Date: Mon, 29 Apr 2024 10:23:06 +0200 Subject: [PATCH 01/35] bump version --- CHANGELOG.md | 10 ++++++++++ assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d607106e..b1435a66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.1.8dev - Augmented Akita Patch [] + +### `Added` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + ## v1.1.7 - Augmented Akita Patch [2024-04-25] ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b9c37c75..e2fffe8a 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/taxprofiler + This report has been generated by the nf-core/taxprofiler analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-taxprofiler-methods-description": diff --git a/nextflow.config b/nextflow.config index 0176e77a..4a125626 100644 --- a/nextflow.config +++ b/nextflow.config @@ -378,7 +378,7 @@ manifest { description = """Taxonomic classification and profiling of shotgun short- and long-read metagenomic data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.7' + version = '1.1.8' doi = '10.1101/2023.10.20.563221' } From 24ec75b2944063466c94d9001cce0f717c8cc01a Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 8 May 2024 14:58:14 +0000 Subject: [PATCH 02/35] Template update for nf-core/tools version 2.14.0 --- .editorconfig | 6 +- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/awsfulltest.yml | 10 +- .github/workflows/awstest.yml | 12 +- .github/workflows/ci.yml | 4 +- .github/workflows/download_pipeline.yml | 22 ++- .github/workflows/fix-linting.yml | 6 +- .github/workflows/linting.yml | 18 +- .github/workflows/linting_comment.yml | 2 +- .github/workflows/release-announcements.yml | 6 +- .nf-core.yml | 1 + .pre-commit-config.yaml | 3 + CHANGELOG.md | 2 +- README.md | 2 +- assets/multiqc_config.yml | 6 +- conf/base.config | 3 - conf/modules.config | 8 - conf/test.config | 2 +- conf/test_full.config | 2 +- docs/usage.md | 2 + modules.json | 4 +- modules/nf-core/fastqc/main.nf | 6 + nextflow.config | 178 +++++++++--------- nextflow_schema.json | 7 + pyproject.toml | 15 -- .../utils_nfcore_taxprofiler_pipeline/main.nf | 16 +- .../nf-core/utils_nfcore_pipeline/main.nf | 8 +- workflows/taxprofiler.nf | 46 +++-- 28 files changed, 226 insertions(+), 173 deletions(-) delete mode 100644 pyproject.toml diff --git a/.editorconfig b/.editorconfig index dd9ffa53..72dda289 100644 --- a/.editorconfig +++ b/.editorconfig @@ -28,10 +28,6 @@ indent_style = unset [/assets/email*] indent_size = unset -# ignore Readme -[README.md] -indent_style = unset - -# ignore python +# ignore python and markdown [*.{py,md}] indent_style = unset diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 888970b6..69a2dd8e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/taxp - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/taxprofiler/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/taxprofiler _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 47fda249..da8c95dd 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,12 +8,12 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/taxprofiler' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) @@ -33,7 +33,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 7b722e77..08f95b7e 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,13 +5,13 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/taxprofiler' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -27,7 +27,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 48699335..72f908a1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,10 +28,10 @@ jobs: - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 08622fd5..2d20d644 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -14,6 +14,8 @@ on: pull_request: types: - opened + - edited + - synchronize branches: - master pull_request_target: @@ -28,11 +30,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 with: @@ -65,8 +70,17 @@ jobs: - name: Inspect download run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - name: Run the downloaded pipeline + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true env: NXF_SINGULARITY_CACHEDIR: ./ NXF_SINGULARITY_HOME_MOUNT: true run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 74c1ce02..7a068de2 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 073e1876..a3fb2541 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,12 +14,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Set up Python 3.11 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 + python-version: "3.12" cache: "pip" - name: Install pre-commit @@ -32,14 +32,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -60,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index b706875f..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index d468aeaa..03ecfcf7 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -12,7 +12,7 @@ jobs: - name: get topics and convert to hashtags id: get_topics run: | - curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT - uses: rzr/fediverse-action@master with: @@ -25,13 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} - ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: python-version: "3.10" - name: Install dependencies diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..d6daa403 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,2 @@ repository_type: pipeline +nf_core_version: "2.14.0" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index af57081f..4dc0f1dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,6 +3,9 @@ repos: rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python rev: "2.7.3" hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md index 313368e1..9147a2ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.1.6dev - [date] +## v1.1.8 - [date] Initial release of nf-core/taxprofiler, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index 925afc70..51ee4946 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/taxprofiler) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/taxprofiler) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23taxprofiler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/taxprofiler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 8e75884e..a94ccdf3 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,9 @@ report_comment: > - This report has been generated by the nf-core/taxprofiler + + This report has been generated by the nf-core/taxprofiler analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. + report_section_order: "nf-core-taxprofiler-methods-description": order: -1000 diff --git a/conf/base.config b/conf/base.config index 372f0798..05b8bb74 100644 --- a/conf/base.config +++ b/conf/base.config @@ -59,7 +59,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/modules.config b/conf/modules.config index e3ea8fa6..d203d2b6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,14 +22,6 @@ process { ext.args = '--quiet' } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/conf/test.config b/conf/test.config index 42772cfe..c60bebf8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' // Genome references genome = 'R64-1-1' diff --git a/conf/test_full.config b/conf/test_full.config index 49a10a0f..8ad76741 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,7 +17,7 @@ params { // Input data for full size test // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' // Genome references genome = 'R64-1-1' diff --git a/docs/usage.md b/docs/usage.md index 286da89c..cfd7f3e2 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -156,6 +156,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. diff --git a/modules.json b/modules.json index be780a86..e0e3d5e9 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "fastqc": { "branch": "master", - "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", "installed_by": ["modules"] }, "multiqc": { @@ -26,7 +26,7 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9e19a74c..d79f1c86 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -25,6 +25,11 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name @@ -33,6 +38,7 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index c8212265..5f1880d8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,7 +16,8 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - fasta = null// MultiQC options + + // MultiQC options multiqc_config = null multiqc_title = null multiqc_logo = null @@ -24,15 +25,16 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options config_profile_name = null @@ -68,103 +70,109 @@ try { } // Load nf-core/taxprofiler custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/taxprofiler.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/taxprofiler profiles: ${params.custom_config_base}/pipeline/taxprofiler.config") -// } +try { + includeConfig "${params.custom_config_base}/pipeline/taxprofiler.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/taxprofiler profiles: ${params.custom_config_base}/pipeline/taxprofiler.config") +} profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - channels = ['conda-forge', 'bioconda', 'defaults'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } @@ -231,7 +239,7 @@ manifest { description = """Taxonomic classification and profiling of shotgun short- and long-read metagenomic data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.6dev' + version = '1.1.8' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index b7fd15d7..5f7b2f63 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -265,6 +265,13 @@ "description": "Validation of parameters in lenient more.", "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } } diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 56110621..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,15 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.ruff] -line-length = 120 -target-version = "py38" -cache-dir = "~/.cache/ruff" - -[tool.ruff.lint] -select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] - -[tool.ruff.lint.isort] -known-first-party = ["nf_core"] - -[tool.ruff.lint.per-file-ignores] -"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf index 312db322..e4b9018c 100644 --- a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf @@ -140,6 +140,10 @@ workflow PIPELINE_COMPLETION { imNotification(summary_params, hook_url) } } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } } /* @@ -230,8 +234,16 @@ def methodsDescriptionText(mqc_methods_yaml) { meta["manifest_map"] = workflow.manifest.toMap() // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references meta["tool_citations"] = "" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..14558c39 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) { // Citation string for pipeline // def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + - " ${workflow.manifest.doi}\n\n" + + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index e722ebaa..ae4f6268 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -40,22 +40,44 @@ workflow TAXPROFILER { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) MULTIQC ( ch_multiqc_files.collect(), From 4726da5f186c7c577b139c9d9dadf6038d3ea3eb Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 May 2024 21:06:16 +0200 Subject: [PATCH 03/35] Make input validation stricter to ensure run_accessions are unique within each sample --- assets/schema_input.json | 1 + 1 file changed, 1 insertion(+) diff --git a/assets/schema_input.json b/assets/schema_input.json index 6acc00f7..cc335436 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -14,6 +14,7 @@ }, "run_accession": { "type": "string", + "unique": ["sample"], "errorMessage": "Run accession must be provided and cannot contain spaces." }, "instrument_platform": { From 5ffcedf6ff36c83b907c43b8b6b6f56c12a0fe52 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 8 May 2024 21:08:47 +0200 Subject: [PATCH 04/35] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1435a66..15660910 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133) + ### `Dependencies` ### `Deprecated` From 71328f85b2140d6c2f6a2ff04fee51a127844339 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 8 May 2024 19:09:56 +0000 Subject: [PATCH 05/35] [automated] Fix code linting --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15660910..a2f9d25a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133) +- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133) ### `Dependencies` From c19d1c75fe4e6b7be6b5b62ae63bb4816735ce97 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Wed, 8 May 2024 23:07:58 +0200 Subject: [PATCH 06/35] Update_documentation --- docs/usage.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index edeca74d..00cdb564 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,11 +46,13 @@ The `sample` identifiers have to be the same when you have re-sequenced the same ```csv title="samplesheet.csv" sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta -2612,run1,ILLUMINA,2612_run1_R1.fq.gz,, -2612,run2,ILLUMINA,2612_run2_R1.fq.gz,, -2612,run3,ILLUMINA,2612_run3_R1.fq.gz,2612_run3_R2.fq.gz, +2612,lane1,ILLUMINA,2612_lane1_R1.fq.gz,ILLUMINA,2612_lane1_R2.fq.gz, +2612,lane2,ILLUMINA,2612_lane2_R1.fq.gz,ILLUMINA,2612_lane2_R2.fq.gz, +2612,lane3,ILLUMINA,2612_lane3_R1.fq.gz,, ``` +Please note that the column name `run_accession` is following the ENA terms and is a single or paired-end set of demultiplexed FASTQs. Given that demultiplexing happens per lane, each sequencing pair from each lane is a 'run' and therefore you get multiple 'runs' per sample, which can span across both lanes and sequencing libraries. + :::warning Runs of the same sample sequenced on Illumina platforms with a combination of single and paired-end data will **not** be run-wise concatenated, unless pair-merging is specified. In the example above, `run3` will be profiled independently of `run1` and `run2` if pairs are not merged. ::: From 8699a98736f65a49b380c4b8320caf34198faebd Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 9 May 2024 11:43:36 +0000 Subject: [PATCH 07/35] Template update for nf-core/tools version 2.14.1 --- .github/workflows/linting.yml | 1 - .nf-core.yml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index a3fb2541..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -20,7 +20,6 @@ jobs: uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: python-version: "3.12" - cache: "pip" - name: Install pre-commit run: pip install pre-commit diff --git a/.nf-core.yml b/.nf-core.yml index d6daa403..e0b85a77 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,2 +1,2 @@ repository_type: pipeline -nf_core_version: "2.14.0" +nf_core_version: "2.14.1" From 6ded3101f7668fe874dd2fabb56c3cf1f6de2431 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 08:40:44 +0200 Subject: [PATCH 08/35] Fix linting --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index d8e704e5..1956605e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -788,7 +788,7 @@ process { pattern: '*.{tsv,csv,arrow,parquet,biom}' ] } - + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ From aa3e5528a6c25ba0afd70591e3fd143c4fb5ad0d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 08:43:23 +0200 Subject: [PATCH 09/35] Wholesale replacement of hard link with param for test data --- conf/test.config | 2 +- conf/test_adapterremoval.config | 6 +++--- conf/test_bbduk.config | 6 +++--- conf/test_falco.config | 6 +++--- conf/test_fastp.config | 6 +++--- conf/test_krakenuniq.config | 8 ++++---- conf/test_malt.config | 6 +++--- conf/test_motus.config | 4 ++-- conf/test_nopreprocessing.config | 6 +++--- conf/test_noprofiling.config | 6 +++--- conf/test_nothing.config | 6 +++--- conf/test_prinseqplusplus.config | 6 +++--- 12 files changed, 34 insertions(+), 34 deletions(-) diff --git a/conf/test.config b/conf/test.config index 90b4a2bd..e8cd48da 100644 --- a/conf/test.config +++ b/conf/test.config @@ -21,7 +21,7 @@ params { // Input data input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' - databases = params.pipelines_testdata_base_path + '/taxprofiler/database_v1.1.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_mergepairs = true diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config index c3422d02..9f9d5b43 100644 --- a/conf/test_adapterremoval.config +++ b/conf/test_adapterremoval.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_tool = 'adapterremoval' @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = false diff --git a/conf/test_bbduk.config b/conf/test_bbduk.config index 623fe191..e92fea8c 100644 --- a/conf/test_bbduk.config +++ b/conf/test_bbduk.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true perform_shortread_complexityfilter = true @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = false diff --git a/conf/test_falco.config b/conf/test_falco.config index 3fb77c03..03d80593 100644 --- a/conf/test_falco.config +++ b/conf/test_falco.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' preprocessing_qc_tool = 'falco' perform_shortread_qc = true perform_longread_qc = true @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false diff --git a/conf/test_fastp.config b/conf/test_fastp.config index 3feeae7a..52767546 100644 --- a/conf/test_fastp.config +++ b/conf/test_fastp.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_tool = 'fastp' @@ -30,7 +30,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = false diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config index e93de158..f116af38 100644 --- a/conf/test_krakenuniq.config +++ b/conf/test_krakenuniq.config @@ -24,8 +24,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_krakenuniq.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_mergepairs = true @@ -33,7 +33,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false @@ -47,7 +47,7 @@ params { kmcp_mode = 0 run_ganon = false run_krona = true - krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' + krona_taxonomy_directory = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' malt_save_reads = false kraken2_save_reads = false centrifuge_save_reads = false diff --git a/conf/test_malt.config b/conf/test_malt.config index 7e5f2df3..d4a91771 100644 --- a/conf/test_malt.config +++ b/conf/test_malt.config @@ -24,15 +24,15 @@ params { max_time = '6.h' // Input data - input = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/samplesheet_malt.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet_malt.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = false perform_longread_qc = false perform_shortread_complexityfilter = false perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false diff --git a/conf/test_motus.config b/conf/test_motus.config index ef1a2276..44cc36ff 100644 --- a/conf/test_motus.config +++ b/conf/test_motus.config @@ -24,7 +24,7 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' databases = 'database_motus.csv' perform_shortread_qc = false perform_longread_qc = false @@ -32,7 +32,7 @@ params { perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index 004a49e8..98f34b07 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -20,15 +20,15 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = false perform_longread_qc = false perform_shortread_complexityfilter = false perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = true diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config index 7cf2317d..b1a52922 100644 --- a/conf/test_noprofiling.config +++ b/conf/test_noprofiling.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_mergepairs = true @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false diff --git a/conf/test_nothing.config b/conf/test_nothing.config index ed247ef4..504fe4ed 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -20,15 +20,15 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = false perform_longread_qc = false perform_shortread_complexityfilter = false perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false diff --git a/conf/test_prinseqplusplus.config b/conf/test_prinseqplusplus.config index acc23aa8..19fb2a60 100644 --- a/conf/test_prinseqplusplus.config +++ b/conf/test_prinseqplusplus.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true perform_shortread_complexityfilter = true @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = false From 306d96a83e69e0aa04db2ec05becf879c72c5e32 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 08:47:19 +0200 Subject: [PATCH 10/35] Fix versions --- assets/multiqc_config.yml | 8 ++++---- nextflow.config | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index af0223f3..0d892103 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,9 +1,9 @@ report_comment: > - - This report has been generated by the nf-core/taxprofiler + + This report has been generated by the nf-core/taxprofiler analysis pipeline. For information about how to interpret these results, please see the - documentation. - + documentation. + report_section_order: "nf-core-taxprofiler-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index b442638c..b8a9ba8f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -385,7 +385,7 @@ manifest { description = """Taxonomic classification and profiling of shotgun short- and long-read metagenomic data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.8' + version = '1.1.8dev' doi = '10.1101/2023.10.20.563221' } From bd2207f3619fa879900f128f892f1492b4a64510 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 08:55:24 +0200 Subject: [PATCH 11/35] Fix adapter removal adn test_full paths --- conf/test_adapterremoval.config | 4 ++-- conf/test_full.config | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config index 9f9d5b43..5fb6d362 100644 --- a/conf/test_adapterremoval.config +++ b/conf/test_adapterremoval.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = params.pipelines_testdata_base_path + 'samplesheet.csv' - databases = params.pipelines_testdata_base_path + 'database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_tool = 'adapterremoval' diff --git a/conf/test_full.config b/conf/test_full.config index 99af49e6..067940bb 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -13,8 +13,8 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = params.pipelines_testdata_base_path + 'test-datasets/taxprofiler/samplesheet_full.csv' - databases = params.pipelines_testdata_base_path + 'test-datasets/taxprofiler/database_full_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet_full.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_full_v1.1.csv' // Genome references hostremoval_reference = 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/819/615/GCA_000819615.1_ViralProj14015/GCA_000819615.1_ViralProj14015_genomic.fna.gz' From ab74be6cedd93c19fafcc49a9e95d6c8f8f3c1f7 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 16 May 2024 09:31:37 +0200 Subject: [PATCH 12/35] Update docs/usage.md Co-authored-by: James A. Fellows Yates --- docs/usage.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 00cdb564..37c24178 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -51,7 +51,13 @@ sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta 2612,lane3,ILLUMINA,2612_lane3_R1.fq.gz,, ``` -Please note that the column name `run_accession` is following the ENA terms and is a single or paired-end set of demultiplexed FASTQs. Given that demultiplexing happens per lane, each sequencing pair from each lane is a 'run' and therefore you get multiple 'runs' per sample, which can span across both lanes and sequencing libraries. +::: info +Please note that the column name `run_accession` follows the definition of an ENA 'run'. +A 'run' corresponds to a single or paired-end set of demultiplexed FASTQs. +Given that demultiplexing of a given library happens per lane, each sequencing pair from each lane is a 'run'. +Therefore, for each sample, you may get multiple 'runs' consisting of _both_ lanes (of the same library) _and_ sequencing libraries. +Therefore ensure that each `run_accession` ID is unique, even if from the same sample! +::: :::warning Runs of the same sample sequenced on Illumina platforms with a combination of single and paired-end data will **not** be run-wise concatenated, unless pair-merging is specified. In the example above, `run3` will be profiled independently of `run1` and `run2` if pairs are not merged. From c3b624abb039bb5995a308f64f1b4d1342e90e95 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 12:56:22 +0200 Subject: [PATCH 13/35] Install new module and start extracting seq type --- modules.json | 2 +- .../krakenuniq/preloadedkrakenuniq/main.nf | 59 +-- .../krakenuniq/preloadedkrakenuniq/meta.yml | 30 +- .../preloadedkrakenuniq/tests/main.nf.test | 211 +++++++++- .../tests/main.nf.test.snap | 374 ++++++++++-------- subworkflows/local/profiling.nf | 8 +- 6 files changed, 464 insertions(+), 220 deletions(-) diff --git a/modules.json b/modules.json index f8c101e0..26dc2317 100644 --- a/modules.json +++ b/modules.json @@ -137,7 +137,7 @@ }, "krakenuniq/preloadedkrakenuniq": { "branch": "master", - "git_sha": "8bbaa881ab9e59f3e18680550d65d52339640630", + "git_sha": "9de9365c3ca6071ec01705919f6667c718ef47b4", "installed_by": ["modules"] }, "krona/ktimporttaxonomy": { diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index 59055bdb..78b2f3ab 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -8,34 +8,37 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { 'biocontainers/krakenuniq:1.0.4--pl5321h19e8d03_0' }" input: - tuple val(meta), path(fastqs) - path db + tuple val(meta), path(sequences) + val sequence_type + path db val ram_chunk_size val save_output_reads val report_file val save_output output: - tuple val(meta), path('*.classified.fasta.gz') , optional:true, emit: classified_reads_fasta - tuple val(meta), path('*.unclassified.fasta.gz') , optional:true, emit: unclassified_reads_fasta - tuple val(meta), path('*.krakenuniq.classified.txt'), optional:true, emit: classified_assignment - tuple val(meta), path('*.krakenuniq.report.txt') , emit: report - path "versions.yml" , emit: versions + tuple val(meta), path("*.classified.${sequence_type}.gz") , optional:true, emit: classified_reads + tuple val(meta), path("*.unclassified.${sequence_type}.gz"), optional:true, emit: unclassified_reads + tuple val(meta), path('*.krakenuniq.classified.txt') , optional:true, emit: classified_assignment + tuple val(meta), path('*.krakenuniq.report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + assert sequence_type in ['fasta', 'fastq'] + def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' - def classified = meta.single_end ? '"\${PREFIX}.classified.fasta"' : '"\${PREFIX}.merged.classified.fasta"' - def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fasta"' : '"\${PREFIX}.merged.unclassified.fasta"' - def classified_option = save_output_reads ? "--classified-out ${classified}" : '' - def unclassified_option = save_output_reads ? "--unclassified-out ${unclassified}" : '' + classified = meta.single_end ? "\${PREFIX}.classified.${sequence_type}" : "\${PREFIX}.merged.classified.${sequence_type}" + unclassified = meta.single_end ? "\${PREFIX}.unclassified.${sequence_type}" : "\${PREFIX}.merged.unclassified.${sequence_type}" + classified_option = save_output_reads ? "--classified-out \"${classified}\"" : '' + unclassified_option = save_output_reads ? "--unclassified-out \"${unclassified}\"" : '' def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' - def compress_reads_command = save_output_reads ? 'gzip --no-name *.fasta' : '' + compress_reads_command = save_output_reads ? "find . -name '*.${sequence_type}' -print0 | xargs -0 -t -P ${task.cpus} -I % gzip --no-name %" : '' if (meta.single_end) { """ krakenuniq \\ @@ -51,7 +54,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo "\${result%%.*}" } - printf "%s\\n" ${fastqs} | while read FASTQ; do \\ + printf "%s\\n" ${sequences} | while read FASTQ; do \\ PREFIX="\$(strip_suffix "\${FASTQ}")" krakenuniq \\ @@ -89,7 +92,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo "\${result%.}" } - printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\ + printf "%s %s\\n" ${sequences} | while read FASTQ; do \\ read -r -a FASTQ <<< "\${FASTQ}" PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)" @@ -115,16 +118,18 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { } stub: + assert sequence_type in ['fasta', 'fastq'] + def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' - def classified = meta.single_end ? '"\${PREFIX}.classified.fasta"' : '"\${PREFIX}.merged.classified.fasta"' - def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fasta"' : '"\${PREFIX}.merged.unclassified.fasta"' - def classified_option = save_output_reads ? "--classified-out ${classified}" : '' - def unclassified_option = save_output_reads ? "--unclassified-out ${unclassified}" : '' + classified = meta.single_end ? "\${PREFIX}.classified.${sequence_type}" : "\${PREFIX}.merged.classified.${sequence_type}" + unclassified = meta.single_end ? "\${PREFIX}.unclassified.${sequence_type}" : "\${PREFIX}.merged.unclassified.${sequence_type}" + classified_option = save_output_reads ? "--classified-out \"${classified}\"" : '' + unclassified_option = save_output_reads ? "--unclassified-out \"${unclassified}\"" : '' def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' - def compress_reads_command = save_output_reads ? 'gzip --no-name *.fasta' : '' + compress_reads_command = save_output_reads ? "find . -name '*.${sequence_type}' -print0 | xargs -0 -t -P ${task.cpus} -I % gzip --no-name %" : '' if (meta.single_end) { """ echo krakenuniq \\ @@ -148,7 +153,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } - printf "%s\\n" ${fastqs} | while read FASTQ; do \\ + printf "%s\\n" ${sequences} | while read FASTQ; do \\ echo "\${FASTQ}" PREFIX="\$(strip_suffix "\${FASTQ}")" echo "\${PREFIX}" @@ -165,11 +170,11 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { create_file "\${PREFIX}.krakenuniq.classified.txt" create_file "\${PREFIX}.krakenuniq.report.txt" - create_gzip_file "\${PREFIX}.classified.fasta.gz" - create_gzip_file "\${PREFIX}.unclassified.fasta.gz" + create_gzip_file "\${PREFIX}.classified.${sequence_type}.gz" + create_gzip_file "\${PREFIX}.unclassified.${sequence_type}.gz" done - echo $compress_reads_command + echo "$compress_reads_command" cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -201,7 +206,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } - printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\ + printf "%s %s\\n" ${sequences} | while read FASTQ; do \\ read -r -a FASTQ <<< "\${FASTQ}" echo "\${FASTQ[@]}" PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)" @@ -220,11 +225,11 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { create_file "\${PREFIX}.krakenuniq.classified.txt" create_file "\${PREFIX}.krakenuniq.report.txt" - create_gzip_file "\${PREFIX}.merged.classified.fasta.gz" - create_gzip_file "\${PREFIX}.merged.unclassified.fasta.gz" + create_gzip_file "\${PREFIX}.merged.classified.${sequence_type}.gz" + create_gzip_file "\${PREFIX}.merged.unclassified.${sequence_type}.gz" done - echo $compress_reads_command + echo "$compress_reads_command" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml index 4a6dffee..bb6409a6 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml @@ -19,9 +19,13 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - fastqs: + - sequences: type: file - description: List of input FastQ files + description: List of input files containing sequences. All of them must be either in FASTA or FASTQ format. + - sequence_type: + type: string + description: Format of all given sequencing files as literal string, either 'fasta' or 'fastq'. + pattern: "{fasta,fastq}" - db: type: directory description: KrakenUniq database @@ -32,31 +36,33 @@ input: - save_output_reads: type: boolean description: | - Optionally commands are added to save classified and unclassified reads as FASTA files. - When the input is paired-end, the single output FASTA contains merged reads. - - save_reads_assignment: + Optionally, commands are added to save classified and unclassified reads + as FASTQ or FASTA files depending on the input format. When the input + is paired-end, the single output FASTQ contains merged reads. + - report_file: type: boolean - description: | - If true, an optional command is added to save a file reporting the taxonomic - classification of each input read + description: Whether to generate a report of relative abundances. + - save_output: + type: boolean + description: Whether to save a file reporting the taxonomic classification of each input read. output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - classified_reads_fasta: + - classified_reads: type: file description: | Reads classified as belonging to any of the taxa in the KrakenUniq reference database. - pattern: "*.classified.fasta.gz" - - unclassified_reads_fasta: + pattern: "*.classified.{fastq,fasta}.gz" + - unclassified_reads: type: file description: | Reads not classified to any of the taxa in the KrakenUniq reference database. - pattern: "*.unclassified.fasta.gz" + pattern: "*.unclassified.{fastq,fasta}.gz" - classified_assignment: type: file description: | diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test index a7c44707..9e1d6700 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test @@ -7,10 +7,23 @@ nextflow_process { tag "modules_nfcore" tag "krakenuniq" tag "krakenuniq/preloadedkrakenuniq" + tag "untar" - test("sarscov2 - Illumina FASTQ single - stub-run") { - options "-stub-run" + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [id: 'krakenuniq'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/krakenuniq.tar.gz', checkIfExists: true) + ] + """ + } + } + } + test("sarscov2 - FASTA") { when { params { outdir = "$outputDir" @@ -19,15 +32,170 @@ nextflow_process { """ input[0] = [ [id:'test', single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = 'fasta' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' + input[4] = true + input[5] = true + input[6] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'genome.krakenuniq.report.txt' }, + { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'genome.unclassified.fasta.gz' }, + { assert snapshot( + process.out.classified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fasta') }, + ) + } + + } + + test("sarscov2 - Illumina FASTQ single") { + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' + input[4] = true + input[5] = true + input[6] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'test_interleaved.krakenuniq.report.txt' }, + { assert snapshot( + process.out.classified_reads, + process.out.unclassified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fastq-single') }, + ) + } + + } + + test("sarscov2 - Illumina FASTQ paired-end") { + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:false], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] - input[1] = [] - input[2] = '8GB' - input[3] = true + input[1] = 'fastq' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' + input[4] = true + input[5] = true + input[6] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'test.krakenuniq.report.txt' }, + { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'test.merged.unclassified.fastq.gz' }, + { assert snapshot( + process.out.classified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fastq-paired') }, + ) + } + + } + + test("sarscov2 - FASTA - stub") { + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = 'fasta' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' + input[4] = true + input[5] = true + input[6] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'genome.krakenuniq.report.txt' }, + { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'genome.unclassified.fasta.gz' }, + { assert snapshot( + process.out.classified_reads, + process.out.unclassified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fasta-stub') }, + ) + } + + } + + test("sarscov2 - Illumina FASTQ single - stub") { + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' input[4] = true input[5] = true + input[6] = true """ } } @@ -35,13 +203,20 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'test_interleaved.krakenuniq.report.txt' }, + { assert snapshot( + process.out.classified_reads, + process.out.unclassified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fastq-single-stub') }, ) } } - test("sarscov2 - Illumina FASTQ paired-end - stub-run") { + test("sarscov2 - Illumina FASTQ paired-end - stub") { options "-stub-run" when { @@ -53,15 +228,16 @@ nextflow_process { input[0] = [ [id:'test', single_end:false], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] - input[1] = [] - input[2] = '8GB' - input[3] = true + input[1] = 'fastq' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' input[4] = true input[5] = true + input[6] = true """ } } @@ -69,7 +245,14 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'test.krakenuniq.report.txt' }, + { assert snapshot( + process.out.classified_reads, + process.out.unclassified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fastq-paired-stub') }, ) } diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap index 970865bd..2a431be8 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap @@ -1,172 +1,218 @@ { - "sarscov2 - Illumina FASTQ paired-end - stub-run": { + "fastq-single-stub": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.merged.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.merged.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "4": [ - "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" - ], - "classified_assignment": [ - [ - { - "id": "test", - "single_end": false - }, - "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "classified_reads_fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.merged.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "report": [ - [ - { - "id": "test", - "single_end": false - }, - "test.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "unclassified_reads_fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.merged.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "versions": [ - "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" - ] - } + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:21:36.338887437" + }, + "fastq-single": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.classified.fastq.gz:md5,3bd95021a8fbced1be8039b990b28176" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.unclassified.fastq.gz:md5,143c7eb70ca93cc2d5ea98767c370424" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.krakenuniq.classified.txt:md5,88a734a9a9216cb0770a77f36c9f4e78" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:17:43.586414914" + }, + "fastq-paired": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.classified.fastq.gz:md5,dd7651837cce63e6108e28f4f019aedb" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.krakenuniq.classified.txt:md5,ed5e19c7a88312cc04e483ac5f2579cd" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] ], - "timestamp": "2023-11-21T15:38:47.810576872" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:37:46.718293365" }, - "sarscov2 - Illumina FASTQ single - stub-run": { + "fasta-stub": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "4": [ - "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" - ], - "classified_assignment": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "classified_reads_fasta": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "report": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "unclassified_reads_fasta": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "versions": [ - "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" - ] - } + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.classified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.unclassified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:28:27.729550991" + }, + "fastq-paired-stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-05T20:06:20.262529457" + }, + "fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.classified.fasta.gz:md5,e73599798195a519ba2565c3f0275b93" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.krakenuniq.classified.txt:md5,8aafacd89a6aac98aaf512df0a7493d1" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] ], - "timestamp": "2023-11-21T15:38:42.894597091" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:36:00.24752418" } } \ No newline at end of file diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index e306f1de..233d867f 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -362,20 +362,24 @@ workflow PROFILING { ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq .map { meta, reads, db_meta, db -> - [[id: db_meta.db_name, single_end: meta.single_end], reads, db_meta, db] + def seqtype = reads[0].matches('*a.gz|*a') ? 'fasta' : 'fastq' + [[id: db_meta.db_name, single_end: meta.single_end, seqtype: seqtype], reads, db_meta, db] } + .dump(tag: 'ch_input_for_krakenuniq_pregrouptuple') .groupTuple(by: [0,2,3]) .flatMap { single_meta, reads, db_meta, db -> def batches = reads.collate(params.krakenuniq_batch_size) return batches.collect { batch -> [ single_meta + db_meta, batch.flatten(), db ]} } + .dump(tag: 'ch_input_for_krakenuniq_premultimap') .multiMap { meta, reads, db -> reads: [ meta, reads ] db: db + seqtype: meta.seqtype } // Hardcode to _always_ produce the report file (which is our basic output, and goes into) - KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.seqtype, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) From 035ac0ca06a5dba7f8b5cc7560729293c73f5de8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 13:05:55 +0200 Subject: [PATCH 14/35] Missing changelog, put logo back in MQC report --- CHANGELOG.md | 2 ++ workflows/taxprofiler.nf | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1435a66..ae312e0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#487](https://github.com/nf-core/taxprofiler/pull/487) Updated to nf-core pipeline template v2.14.1 (added by jfy133) + ### `Fixed` ### `Dependencies` diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 6c6e8f5c..fdb4266c 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -333,7 +333,7 @@ workflow TAXPROFILER { Channel.empty() ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() + Channel.fromPath("${workflow.projectDir}/docs/images/nf-core-taxprofiler_logo_custom_light.png", checkIfExists: true) summary_params = paramsSummaryMap( workflow, parameters_schema: "nextflow_schema.json") From 558fcbb6844e98e1b54f8698601fb4c70899cfdf Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 11:21:29 +0000 Subject: [PATCH 15/35] Add updated docs --- docs/usage.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 37c24178..6534b820 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -53,9 +53,9 @@ sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta ::: info Please note that the column name `run_accession` follows the definition of an ENA 'run'. -A 'run' corresponds to a single or paired-end set of demultiplexed FASTQs. +A 'run' corresponds to a single or paired-end set of demultiplexed FASTQs. Given that demultiplexing of a given library happens per lane, each sequencing pair from each lane is a 'run'. -Therefore, for each sample, you may get multiple 'runs' consisting of _both_ lanes (of the same library) _and_ sequencing libraries. +Therefore, for each sample, you may get multiple 'runs' consisting of _both_ lanes (of the same library) _and_ sequencing libraries. Therefore ensure that each `run_accession` ID is unique, even if from the same sample! ::: @@ -460,6 +460,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. From faa002dd5c5c3383cb2f9497426e0a9828720286 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 14:09:59 +0200 Subject: [PATCH 16/35] Fix metadata --- subworkflows/local/longread_preprocessing.nf | 4 ++-- subworkflows/local/profiling.nf | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 30963ec6..72261013 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -20,7 +20,7 @@ workflow LONGREAD_PREPROCESSING { PORECHOP_PORECHOP ( reads ) ch_processed_reads = PORECHOP_PORECHOP.out.reads - .map { meta, reads -> [ meta + [single_end: 1], reads ] } + .map { meta, reads -> [ meta + [single_end: true], reads ] } ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log ) @@ -34,7 +34,7 @@ workflow LONGREAD_PREPROCESSING { } else { PORECHOP_PORECHOP ( reads ) ch_clipped_reads = PORECHOP_PORECHOP.out.reads - .map { meta, reads -> [ meta + [single_end: 1], reads ] } + .map { meta, reads -> [ meta + [single_end: true], reads ] } ch_processed_reads = FILTLONG ( ch_clipped_reads.map { meta, reads -> [ meta, [], reads ] } ).reads diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 233d867f..e64c3b61 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -60,7 +60,7 @@ workflow PROFILING { COMBINE READS WITH POSSIBLE DATABASES */ - // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] + // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':true], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads .map { meta, reads -> @@ -362,7 +362,7 @@ workflow PROFILING { ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq .map { meta, reads, db_meta, db -> - def seqtype = reads[0].matches('*a.gz|*a') ? 'fasta' : 'fastq' + def seqtype = reads[0].name.matches(".*a.gz\$|.*a\$") ? 'fasta' : 'fastq' [[id: db_meta.db_name, single_end: meta.single_end, seqtype: seqtype], reads, db_meta, db] } .dump(tag: 'ch_input_for_krakenuniq_pregrouptuple') @@ -382,8 +382,8 @@ workflow PROFILING { KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.seqtype, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) - ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) - ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) + ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]}.dump(tag: 'post-ku-classifications') ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]}.dump(tag: 'post-ku-reports') ) } From ad008c8a1242a91028a3b29dd90210888cb9adda Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 14:15:24 +0200 Subject: [PATCH 17/35] Ensure to published FASTQ files too --- conf/modules.config | 2 +- docs/output.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 1956605e..dce43d1c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -517,7 +517,7 @@ process { publishDir = [ path: { "${params.outdir}/krakenuniq/${meta.db_name}/" }, mode: params.publish_dir_mode, - pattern: '*.{txt,fasta.gz}' + pattern: '*.{txt,fastq.gz,fasta.gz}' ] } diff --git a/docs/output.md b/docs/output.md index 2cebd463..8fbb22f9 100644 --- a/docs/output.md +++ b/docs/output.md @@ -398,8 +398,8 @@ You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply - `krakenuniq/` - `/` - - `_[.merged].classified.fasta.gz`: Optional FASTA file containing all reads that had a hit against a reference in the database for a given sample. Paired-end input reads are merged in this output. - - `_[.merged].unclassified.fasta.gz`: Optional FASTA file containing all reads that did not have a hit in the database for a given sample. Paired-end input reads are merged in this output. + - `_[.merged].classified.fast{a,q}.gz`: Optional FASTA file containing all reads that had a hit against a reference in the database for a given sample. Paired-end input reads are merged in this output. + - `_[.merged].unclassified.fast{a,q}.gz`: Optional FASTA file containing all reads that did not have a hit in the database for a given sample. Paired-end input reads are merged in this output. - `_.krakenuniq.report.txt`: A Kraken2-style report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits, with an additional column for k-mer coverage, that allows for more accurate distinguishing between false-positive/true-postitive hits. - `_.krakenuniq.classified.txt`: An optional list of read IDs and the hits each read had against each database for a given sample. From d5d82b13e210bbc8ea8bae476c9a2de80d50ac0a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 16:06:24 +0200 Subject: [PATCH 18/35] Apply suggestions from code review Co-authored-by: Moritz E. Beber --- subworkflows/local/profiling.nf | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index e64c3b61..2bd04564 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -362,16 +362,14 @@ workflow PROFILING { ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq .map { meta, reads, db_meta, db -> - def seqtype = reads[0].name.matches(".*a.gz\$|.*a\$") ? 'fasta' : 'fastq' + def seqtype = (reads[0].name ==~ /.+?\.f\w{0,3}a(\.gz)?$/) ? 'fasta' : 'fastq' [[id: db_meta.db_name, single_end: meta.single_end, seqtype: seqtype], reads, db_meta, db] } - .dump(tag: 'ch_input_for_krakenuniq_pregrouptuple') .groupTuple(by: [0,2,3]) .flatMap { single_meta, reads, db_meta, db -> def batches = reads.collate(params.krakenuniq_batch_size) return batches.collect { batch -> [ single_meta + db_meta, batch.flatten(), db ]} } - .dump(tag: 'ch_input_for_krakenuniq_premultimap') .multiMap { meta, reads, db -> reads: [ meta, reads ] @@ -382,8 +380,8 @@ workflow PROFILING { KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.seqtype, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) - ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]}.dump(tag: 'post-ku-classifications') ) - ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]}.dump(tag: 'post-ku-reports') ) + ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]} ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]} ) } From 888886632f9db02d51d217f1bfb895ed6c9b504b Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Thu, 23 May 2024 16:06:17 +0200 Subject: [PATCH 19/35] Add flag to publish intermediate bracken files --- conf/modules.config | 3 +- docs/output.md | 2 ++ nextflow.config | 3 +- nextflow_schema.json | 76 ++++++++++++++++++++++++++++++++++++++------ 4 files changed, 72 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index dce43d1c..693f1e2e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -467,7 +467,8 @@ process { publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}/" }, mode: params.publish_dir_mode, - pattern: '*.{txt,fastq.gz}' + pattern: '*.{txt,fastq.gz}', + saveAs: { params.bracken_save_intermediatekraken2 == false && meta.tool == bracken ? false : true } ] } diff --git a/docs/output.md b/docs/output.md index 8fbb22f9..dc5f99d4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -389,6 +389,8 @@ The main taxonomic classification file from Kraken2 is the `_combined_reports.tx You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline. +When running Kraken2 together with bracken, you can use the `--bracken_save_intermediatekraken2` to save the intermediate/upstream files generated during the bracken run. + ### KrakenUniq [KrakenUniq](https://github.com/fbreitwieser/krakenuniq) (formerly KrakenHLL) is an extension to the fast k-mer-based classification performed by [Kraken](https://github.com/DerrickWood/kraken) with an efficient algorithm for additionally assessing the coverage of unique k-mers found in each species in a dataset. diff --git a/nextflow.config b/nextflow.config index b8a9ba8f..1b105692 100644 --- a/nextflow.config +++ b/nextflow.config @@ -135,7 +135,8 @@ params { krakenuniq_batch_size = 20 // Bracken - run_bracken = false + run_bracken = false + bracken_save_intermediatekraken2 = false // centrifuge run_centrifuge = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 3f7d9eec..5d004189 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,11 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "databases", "outdir"], + "required": [ + "input", + "databases", + "outdir" + ], "properties": { "input": { "type": "string", @@ -75,7 +79,10 @@ "preprocessing_qc_tool": { "type": "string", "default": "fastqc", - "enum": ["fastqc", "falco"], + "enum": [ + "fastqc", + "falco" + ], "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", "description": "Specify the tool used for quality control of raw sequencing reads", "fa_icon": "fas fa-tools" @@ -110,7 +117,10 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"], + "enum": [ + "fastp", + "adapterremoval" + ], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -172,7 +182,11 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": ["bbduk", "prinseqplusplus", "fastp"], + "enum": [ + "bbduk", + "prinseqplusplus", + "fastp" + ], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -206,7 +220,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"], + "enum": [ + "entropy", + "dust" + ], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -377,7 +394,15 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], + "enum": [ + "blast", + "xml", + "txt", + "daa", + "sam", + "tsv", + "paf" + ], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -402,7 +427,14 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": ["phylum", "class", "order", "family", "genus", "species"], + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be only be a single level (e.g. `species`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -466,6 +498,11 @@ "description": "Turn on Bracken (and the required Kraken2 prerequisite step).", "fa_icon": "fas fa-toggle-on" }, + "bracken_save_intermediatekraken2": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Turn on saving the upstream Bracken files" + }, "run_malt": { "type": "boolean", "fa_icon": "fas fa-toggle-on", @@ -552,7 +589,13 @@ "default": "reads", "description": "Specify the type of ganon report to save.", "help_text": "Specify the type of taxonomic report to produce from ganon report. This mainly refers to which form of 'value' to print: raw read counts, abundance estimates, genome-size normalised etc. \n\nSee the [ganon documentation](https://pirovc.github.io/ganon/outputfiles/#ganon-report) for more information of each option.\n\n> Modifies tool parameter(s):\n- ganon report: `--report-type`\n", - "enum": ["abundance", "reads", "matches", "dist", "corr"], + "enum": [ + "abundance", + "reads", + "matches", + "dist", + "corr" + ], "fa_icon": "fas fa-file" }, "ganon_report_rank": { @@ -620,7 +663,13 @@ "default": "tsv", "fa_icon": "fas fa-pastafarianism", "description": "The desired output format.", - "enum": ["tsv", "csv", "arrow", "parquet", "biom"] + "enum": [ + "tsv", + "csv", + "arrow", + "parquet", + "biom" + ] }, "taxpasta_taxonomy_dir": { "type": "string", @@ -775,7 +824,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { From 11ee2747678ae7ed18bdf4867a419549a90f4544 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Thu, 23 May 2024 16:40:00 +0200 Subject: [PATCH 20/35] prettier --- nextflow_schema.json | 71 +++++++------------------------------------- 1 file changed, 10 insertions(+), 61 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5d004189..999f25d8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,11 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "databases", - "outdir" - ], + "required": ["input", "databases", "outdir"], "properties": { "input": { "type": "string", @@ -79,10 +75,7 @@ "preprocessing_qc_tool": { "type": "string", "default": "fastqc", - "enum": [ - "fastqc", - "falco" - ], + "enum": ["fastqc", "falco"], "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", "description": "Specify the tool used for quality control of raw sequencing reads", "fa_icon": "fas fa-tools" @@ -117,10 +110,7 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ], + "enum": ["fastp", "adapterremoval"], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -182,11 +172,7 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": [ - "bbduk", - "prinseqplusplus", - "fastp" - ], + "enum": ["bbduk", "prinseqplusplus", "fastp"], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -220,10 +206,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ], + "enum": ["entropy", "dust"], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -394,15 +377,7 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": [ - "blast", - "xml", - "txt", - "daa", - "sam", - "tsv", - "paf" - ], + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -427,14 +402,7 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ], + "enum": ["phylum", "class", "order", "family", "genus", "species"], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be only be a single level (e.g. `species`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -589,13 +557,7 @@ "default": "reads", "description": "Specify the type of ganon report to save.", "help_text": "Specify the type of taxonomic report to produce from ganon report. This mainly refers to which form of 'value' to print: raw read counts, abundance estimates, genome-size normalised etc. \n\nSee the [ganon documentation](https://pirovc.github.io/ganon/outputfiles/#ganon-report) for more information of each option.\n\n> Modifies tool parameter(s):\n- ganon report: `--report-type`\n", - "enum": [ - "abundance", - "reads", - "matches", - "dist", - "corr" - ], + "enum": ["abundance", "reads", "matches", "dist", "corr"], "fa_icon": "fas fa-file" }, "ganon_report_rank": { @@ -663,13 +625,7 @@ "default": "tsv", "fa_icon": "fas fa-pastafarianism", "description": "The desired output format.", - "enum": [ - "tsv", - "csv", - "arrow", - "parquet", - "biom" - ] + "enum": ["tsv", "csv", "arrow", "parquet", "biom"] }, "taxpasta_taxonomy_dir": { "type": "string", @@ -824,14 +780,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { From 0064d0d02092bd4308aaea671f87bba3293e38ad Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 24 May 2024 05:33:43 +0200 Subject: [PATCH 21/35] Update conf/modules.config --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 693f1e2e..163f0b00 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -468,7 +468,7 @@ process { path: { "${params.outdir}/kraken2/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{txt,fastq.gz}', - saveAs: { params.bracken_save_intermediatekraken2 == false && meta.tool == bracken ? false : true } + saveAs: { params.bracken_save_intermediatekraken2 == false && meta.tool == bracken ? null : it } ] } From 3c433a6452318e1a6deb6b34080259faecbadf2e Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 30 May 2024 10:35:19 +0200 Subject: [PATCH 22/35] Correct condition --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 163f0b00..2148b69f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -468,7 +468,7 @@ process { path: { "${params.outdir}/kraken2/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{txt,fastq.gz}', - saveAs: { params.bracken_save_intermediatekraken2 == false && meta.tool == bracken ? null : it } + saveAs: { !params.bracken_save_intermediatekraken2 && meta.tool == "bracken" ? null : it } ] } From e14894832d86acb0cb4bb51b010b012fd1b1991f Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 30 May 2024 10:45:29 +0200 Subject: [PATCH 23/35] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 043a62c7..5134ae3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133) +- [491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133) ### `Dependencies` From 892782f851343b6e3df996bbef08c4c84ee171dc Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 30 May 2024 10:47:39 +0200 Subject: [PATCH 24/35] Update nextflow_schema.json Co-authored-by: James A. Fellows Yates --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 999f25d8..0e4185b5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -469,7 +469,7 @@ "bracken_save_intermediatekraken2": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Turn on saving the upstream Bracken files" + "description": "Turn on the saving of the intermediate Kraken2 files used as input to Bracken itself into Kraken2 results folder" }, "run_malt": { "type": "boolean", From fffe8e180093c9fe13baf22551ce514626ffa157 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 30 May 2024 10:47:45 +0200 Subject: [PATCH 25/35] Update docs/output.md Co-authored-by: James A. Fellows Yates --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index dc5f99d4..9528514b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -389,7 +389,7 @@ The main taxonomic classification file from Kraken2 is the `_combined_reports.tx You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline. -When running Kraken2 together with bracken, you can use the `--bracken_save_intermediatekraken2` to save the intermediate/upstream files generated during the bracken run. +When running Bracken, you will only get the 'intermediate' Kraken2 report files in this directory if you supply `--bracken_save_intermediatekraken2` to the run. ### KrakenUniq From ca29906d6ed2ec43980810d81f1d8161429b8a1f Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 30 May 2024 10:52:40 +0200 Subject: [PATCH 26/35] Update output.md --- docs/output.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 9528514b..732d5964 100644 --- a/docs/output.md +++ b/docs/output.md @@ -376,11 +376,11 @@ The main taxonomic profiling file from Bracken is the `*.tsv` file. This provide - `kraken2/` - `_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `krakentools`) - - If you have also run Bracken, the original Kraken report (i.e., _before_ read re-assignment) will also be included in this directory with `-bracken` suffixed to your Bracken database name. For example: `kraken2--bracken.tsv`. However in most cases you want to use the actual Bracken file (i.e., `bracken_.tsv`). + - If you have also run Bracken, the original Kraken report (i.e., _before_ read re-assignment) will also be included in this directory with `-bracken` suffixed to your Bracken database name if you supply `--bracken_save_intermediatekraken2` to the run. For example: `kraken2--bracken.tsv`. However in most cases you want to use the actual Bracken file (i.e., `bracken_.tsv`). - `/` - `_.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample - `_.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample - - `_.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample. Will be 6 column rather than 8 if `--save_minimizers` specified. + - `_.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample. Will be 6 column rather than 8 if `--save_minimizers` specified. This report will **only** be included if you supply `--bracken_save_intermediatekraken2` to the run. - `_.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample From b409e56d0523cabd40afb8c2a3e1fe449ab4bc7e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 30 May 2024 10:56:17 +0200 Subject: [PATCH 27/35] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5134ae3f..51c3c88d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133) -- [491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133) +- [#491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133) ### `Dependencies` From ee31ff0ad0f38e15b0dd83eda5f71a85a8c7a955 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 30 May 2024 10:59:22 +0200 Subject: [PATCH 28/35] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51c3c88d..347d0604 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133) - [#491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133) +- [489](https://github.com/nf-core/taxprofiler/pull/489) Fix KrakenUniq classified reads output format mismatch (❤️ to @SannaAb for reporting, fixed by @jfy133) ### `Dependencies` From 94812b3378bf03431311ae93ddeaa81f75ee895d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 10 Jun 2024 14:36:21 +0200 Subject: [PATCH 29/35] Remove --kmcp_mode and bump KMCP version --- CHANGELOG.md | 6 + conf/test.config | 1 - conf/test_adapterremoval.config | 1 - conf/test_bbduk.config | 1 - conf/test_falco.config | 1 - conf/test_fastp.config | 1 - conf/test_krakenuniq.config | 1 - conf/test_malt.config | 1 - conf/test_motus.config | 1 - conf/test_nopreprocessing.config | 1 - conf/test_noprofiling.config | 1 - conf/test_nothing.config | 1 - conf/test_prinseqplusplus.config | 1 - .../nf-core-taxprofiler_logo_generic.svg | 2309 +++++++++++++++++ modules.json | 2 +- modules/nf-core/kmcp/search/environment.yml | 7 + modules/nf-core/kmcp/search/main.nf | 10 +- modules/nf-core/kmcp/search/meta.yml | 15 +- .../nf-core/kmcp/search/tests/main.nf.test | 88 + .../kmcp/search/tests/main.nf.test.snap | 72 + .../nf-core/kmcp/search/tests/nextflow.config | 5 + modules/nf-core/kmcp/search/tests/tags.yml | 2 + nextflow.config | 1 - nextflow_schema.json | 7 - subworkflows/local/profiling.nf | 4 +- 25 files changed, 2504 insertions(+), 36 deletions(-) create mode 100644 docs/images/nf-core-taxprofiler_logo_generic.svg create mode 100644 modules/nf-core/kmcp/search/environment.yml create mode 100644 modules/nf-core/kmcp/search/tests/main.nf.test create mode 100644 modules/nf-core/kmcp/search/tests/main.nf.test.snap create mode 100644 modules/nf-core/kmcp/search/tests/nextflow.config create mode 100644 modules/nf-core/kmcp/search/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 347d0604..e98482ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,8 +17,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` +| Tool | Previous version | New version | +| ---- | ---------------- | ----------- | +| KMCP | 0.9.1 | 0.9.4 | + ### `Deprecated` +- [#492](https://github.com/nf-core/taxprofiler/pull/492) Removed `--kmcp_mode` parameter from KMCP to allow per database specification by setting in db_params in database sheet (fixed by @jfy133) + ## v1.1.7 - Augmented Akita Patch [2024-04-25] ### `Added` diff --git a/conf/test.config b/conf/test.config index e8cd48da..042dc2fa 100644 --- a/conf/test.config +++ b/conf/test.config @@ -42,7 +42,6 @@ params { run_ganon = true run_krona = true run_kmcp = true - kmcp_mode = 0 krona_taxonomy_directory = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' malt_save_reads = true kraken2_save_reads = true diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config index 5fb6d362..ee55ba55 100644 --- a/conf/test_adapterremoval.config +++ b/conf/test_adapterremoval.config @@ -41,7 +41,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_bbduk.config b/conf/test_bbduk.config index e92fea8c..d0ff530a 100644 --- a/conf/test_bbduk.config +++ b/conf/test_bbduk.config @@ -41,7 +41,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_falco.config b/conf/test_falco.config index 03d80593..8bcd9889 100644 --- a/conf/test_falco.config +++ b/conf/test_falco.config @@ -41,7 +41,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_fastp.config b/conf/test_fastp.config index 52767546..81bec14c 100644 --- a/conf/test_fastp.config +++ b/conf/test_fastp.config @@ -42,7 +42,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config index f116af38..fc6305de 100644 --- a/conf/test_krakenuniq.config +++ b/conf/test_krakenuniq.config @@ -44,7 +44,6 @@ params { run_krakenuniq = true run_motus = false run_kmcp = false - kmcp_mode = 0 run_ganon = false run_krona = true krona_taxonomy_directory = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' diff --git a/conf/test_malt.config b/conf/test_malt.config index d4a91771..7d9bd2b6 100644 --- a/conf/test_malt.config +++ b/conf/test_malt.config @@ -44,7 +44,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_motus.config b/conf/test_motus.config index 44cc36ff..c2d4ac22 100644 --- a/conf/test_motus.config +++ b/conf/test_motus.config @@ -43,7 +43,6 @@ params { run_krakenuniq = false run_motus = true run_kmcp = false - kmcp_mode = 0 run_ganon = false motus_save_mgc_read_counts = false motus_remove_ncbi_ids = false diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index 98f34b07..42014303 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -39,7 +39,6 @@ params { run_krakenuniq = true run_motus = false run_kmcp = true - kmcp_mode = 0 run_ganon = true run_krona = true } diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config index b1a52922..4e917fb9 100644 --- a/conf/test_noprofiling.config +++ b/conf/test_noprofiling.config @@ -40,7 +40,6 @@ params { run_krakenuniq = false run_motus = false run_kmcp = false - kmcp_mode = 0 run_ganon = false } diff --git a/conf/test_nothing.config b/conf/test_nothing.config index 504fe4ed..d36c76d4 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -39,7 +39,6 @@ params { run_krakenuniq = false run_motus = false run_kmcp = false - kmcp_mode = 0 run_ganon = false } diff --git a/conf/test_prinseqplusplus.config b/conf/test_prinseqplusplus.config index 19fb2a60..c7ce2259 100644 --- a/conf/test_prinseqplusplus.config +++ b/conf/test_prinseqplusplus.config @@ -41,7 +41,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/docs/images/nf-core-taxprofiler_logo_generic.svg b/docs/images/nf-core-taxprofiler_logo_generic.svg new file mode 100644 index 00000000..632997ca --- /dev/null +++ b/docs/images/nf-core-taxprofiler_logo_generic.svg @@ -0,0 +1,2309 @@ + + + +taxfindertaxprofiler/ diff --git a/modules.json b/modules.json index ecb96290..46fd242c 100644 --- a/modules.json +++ b/modules.json @@ -117,7 +117,7 @@ }, "kmcp/search": { "branch": "master", - "git_sha": "e198734cc3be18af5f64f6d7734c7f1a7c3af5a6", + "git_sha": "64cd3f418b191a008b9d362b8ccf0216ae0302d5", "installed_by": ["modules"] }, "kraken2/kraken2": { diff --git a/modules/nf-core/kmcp/search/environment.yml b/modules/nf-core/kmcp/search/environment.yml new file mode 100644 index 00000000..397fcb8a --- /dev/null +++ b/modules/nf-core/kmcp/search/environment.yml @@ -0,0 +1,7 @@ +name: kmcp_search +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kmcp=0.9.4 diff --git a/modules/nf-core/kmcp/search/main.nf b/modules/nf-core/kmcp/search/main.nf index cb2d6843..62f74aeb 100644 --- a/modules/nf-core/kmcp/search/main.nf +++ b/modules/nf-core/kmcp/search/main.nf @@ -2,14 +2,14 @@ process KMCP_SEARCH { tag "$meta.id" label 'process_medium' - conda "bioconda::kmcp=0.9.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kmcp:0.9.1--h9ee0642_0': - 'biocontainers/kmcp:0.9.1--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/kmcp:0.9.4--h9ee0642_0': + 'biocontainers/kmcp:0.9.4--h9ee0642_0' }" input: - path(db) tuple val(meta), path(reads) + path(db) output: tuple val(meta), path("*.gz") , emit: result @@ -40,7 +40,7 @@ process KMCP_SEARCH { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.gz + echo "" | gzip > ${prefix}.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/kmcp/search/meta.yml b/modules/nf-core/kmcp/search/meta.yml index 5526a179..6d003b04 100644 --- a/modules/nf-core/kmcp/search/meta.yml +++ b/modules/nf-core/kmcp/search/meta.yml @@ -14,23 +14,21 @@ tools: documentation: "https://github.com/shenwei356/kmcp#documents" tool_dev_url: "https://github.com/shenwei356/kmcp" doi: "10.1093/bioinformatics/btac845" - licence: "['MIT']" - + licence: ["MIT"] input: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - db: - type: directory - description: Database directory created by "kmcp index" - pattern: "*" - reads: type: file description: gzipped fasta or fastq files pattern: "*.{fq.gz,fastq.gz,fa.gz}" - + - db: + type: directory + description: Database directory created by "kmcp index" + pattern: "*" output: - meta: type: map @@ -45,6 +43,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@sofstam" +maintainers: + - "@sofstam" diff --git a/modules/nf-core/kmcp/search/tests/main.nf.test b/modules/nf-core/kmcp/search/tests/main.nf.test new file mode 100644 index 00000000..adc693e6 --- /dev/null +++ b/modules/nf-core/kmcp/search/tests/main.nf.test @@ -0,0 +1,88 @@ +nextflow_process { + + name "Test Process KMCP_SEARCH" + script "../main.nf" + process "KMCP_SEARCH" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "kmcp" + tag "kmcp/search" + tag "kmcp/compute" + tag "kmcp/index" + + setup { + run("KMCP_COMPUTE") { + script "../../../kmcp/compute/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + run("KMCP_INDEX") { + script "../../../kmcp/index/main.nf" + process { + """ + input[0] = KMCP_COMPUTE.out.outdir + """ + } + } + } + + test("sarscov2 - fasta") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = KMCP_INDEX.out.kmcp.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + ).match() + } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = KMCP_INDEX.out.kmcp.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/kmcp/search/tests/main.nf.test.snap b/modules/nf-core/kmcp/search/tests/main.nf.test.snap new file mode 100644 index 00000000..e245e2e9 --- /dev/null +++ b/modules/nf-core/kmcp/search/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,69d488bec087e13e13bef0482633b6c3" + ], + "result": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,69d488bec087e13e13bef0482633b6c3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T14:41:54.308010562" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gz:md5,84953d3d517f65722d43a2c3fdd04935" + ] + ], + "1": [ + "versions.yml:md5,69d488bec087e13e13bef0482633b6c3" + ], + "result": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gz:md5,84953d3d517f65722d43a2c3fdd04935" + ] + ], + "versions": [ + "versions.yml:md5,69d488bec087e13e13bef0482633b6c3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-06T21:35:20.053225742" + } +} \ No newline at end of file diff --git a/modules/nf-core/kmcp/search/tests/nextflow.config b/modules/nf-core/kmcp/search/tests/nextflow.config new file mode 100644 index 00000000..9366eab4 --- /dev/null +++ b/modules/nf-core/kmcp/search/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: KMCP_INDEX { + ext.prefix = { "${meta.id}_kmcp" } + } +} diff --git a/modules/nf-core/kmcp/search/tests/tags.yml b/modules/nf-core/kmcp/search/tests/tags.yml new file mode 100644 index 00000000..e281416c --- /dev/null +++ b/modules/nf-core/kmcp/search/tests/tags.yml @@ -0,0 +1,2 @@ +kmcp/search: + - "modules/nf-core/kmcp/search/**" diff --git a/nextflow.config b/nextflow.config index 1b105692..83cbf39f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -163,7 +163,6 @@ params { // kmcp run_kmcp = false - kmcp_mode = 3 // default kmcp profiling value kmcp_save_search = false // ganon diff --git a/nextflow_schema.json b/nextflow_schema.json index 0e4185b5..deef481a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -528,13 +528,6 @@ "description": "Turn on classification with KMCP.", "fa_icon": "fas fa-toggle-on" }, - "kmcp_mode": { - "type": "integer", - "default": 3, - "description": "Specify which KMCP profiling mode to use.", - "help_text": "Available values: \n0 (for pathogen detection)\n1 (higherrecall)\n2 (high recall)\n3 (default)\n4 (high precision)\n5 (higher precision).\nFor more information about the different profiling modes, please see the [kmcp documentation](https://bioinf.shenwei.me/kmcp/usage/#profile)\n\n> Modifies tool parameter(s):\n- kmcp profile: `--mode`\n\n", - "fa_icon": "fas fa-check-square" - }, "kmcp_save_search": { "type": "boolean", "fa_icon": "fas fa-save", diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 2bd04564..09b8ac19 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -416,7 +416,7 @@ workflow PROFILING { db: it[3] } - KMCP_SEARCH ( ch_input_for_kmcp.db, ch_input_for_kmcp.reads ) + KMCP_SEARCH ( ch_input_for_kmcp.reads, ch_input_for_kmcp.db ) ch_versions = ch_versions.mix( KMCP_SEARCH.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix(KMCP_SEARCH.out.result) @@ -453,7 +453,7 @@ workflow PROFILING { } //Generate kmcp profile - KMCP_PROFILE( ch_input_for_kmcp_profile.report, ch_input_for_kmcp.db, params.kmcp_mode ) + KMCP_PROFILE( ch_input_for_kmcp_profile.report, ch_input_for_kmcp.db ) ch_versions = ch_versions.mix( KMCP_PROFILE.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( KMCP_PROFILE.out.profile ) ch_multiqc_files = ch_multiqc_files.mix( KMCP_PROFILE.out.profile ) From 124a9c937d356af0608370254776a30074358fea Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 10 Jun 2024 14:46:14 +0200 Subject: [PATCH 30/35] Actually include KMCP Profile --- modules.json | 2 +- modules/nf-core/kmcp/profile/environment.yml | 7 ++ modules/nf-core/kmcp/profile/main.nf | 8 +- modules/nf-core/kmcp/profile/meta.yml | 22 ++-- .../nf-core/kmcp/profile/tests/main.nf.test | 109 ++++++++++++++++++ .../kmcp/profile/tests/main.nf.test.snap | 72 ++++++++++++ .../kmcp/profile/tests/nextflow.config | 13 +++ modules/nf-core/kmcp/profile/tests/tags.yml | 2 + 8 files changed, 214 insertions(+), 21 deletions(-) create mode 100644 modules/nf-core/kmcp/profile/environment.yml create mode 100644 modules/nf-core/kmcp/profile/tests/main.nf.test create mode 100644 modules/nf-core/kmcp/profile/tests/main.nf.test.snap create mode 100644 modules/nf-core/kmcp/profile/tests/nextflow.config create mode 100644 modules/nf-core/kmcp/profile/tests/tags.yml diff --git a/modules.json b/modules.json index 46fd242c..04001af6 100644 --- a/modules.json +++ b/modules.json @@ -112,7 +112,7 @@ }, "kmcp/profile": { "branch": "master", - "git_sha": "e198734cc3be18af5f64f6d7734c7f1a7c3af5a6", + "git_sha": "6f56948d0674ad5870035e80c7af209a51d8e243", "installed_by": ["modules"] }, "kmcp/search": { diff --git a/modules/nf-core/kmcp/profile/environment.yml b/modules/nf-core/kmcp/profile/environment.yml new file mode 100644 index 00000000..43de2a64 --- /dev/null +++ b/modules/nf-core/kmcp/profile/environment.yml @@ -0,0 +1,7 @@ +name: kmcp_profile +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kmcp=0.9.4 diff --git a/modules/nf-core/kmcp/profile/main.nf b/modules/nf-core/kmcp/profile/main.nf index a4672122..3de9fdb7 100644 --- a/modules/nf-core/kmcp/profile/main.nf +++ b/modules/nf-core/kmcp/profile/main.nf @@ -2,15 +2,14 @@ process KMCP_PROFILE { tag "$meta.id" label 'process_medium' - conda "bioconda::kmcp=0.9.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kmcp:0.9.1--h9ee0642_0': - 'biocontainers/kmcp:0.9.1--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/kmcp:0.9.4--h9ee0642_0': + 'biocontainers/kmcp:0.9.4--h9ee0642_0' }" input: tuple val(meta), path(search_results) path (db) - val mode output: tuple val(meta), path("*.profile"), emit: profile @@ -30,7 +29,6 @@ process KMCP_PROFILE { $args \\ -X \$taxdump \\ -T \$taxid \\ - -m $mode \\ -j $task.cpus \\ -o ${prefix}.profile \\ $search_results diff --git a/modules/nf-core/kmcp/profile/meta.yml b/modules/nf-core/kmcp/profile/meta.yml index 14f292c7..ba1ca2a2 100644 --- a/modules/nf-core/kmcp/profile/meta.yml +++ b/modules/nf-core/kmcp/profile/meta.yml @@ -15,30 +15,21 @@ tools: documentation: "https://bioinf.shenwei.me/kmcp/usage/#profile" tool_dev_url: "https://github.com/shenwei356/kmcp" doi: "10.1093/bioinformatics/btac845" - licence: "['MIT']" - + licence: ["MIT"] input: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - db: - type: directory - description: Database directory containing taxdump files and taxid file - search_results: type: file description: Gzipped file output from kmcp search module pattern: "*.gz" - - mode: - type: integer - description: Profiling mode. - 0-pathogen detection - 1-higher recall - 2-high recall - 3-default - 4-high precision - 5-higher precision + - db: + type: directory + description: Database directory containing taxdump files and taxid file + output: - meta: type: map @@ -53,6 +44,7 @@ output: type: file description: Tab-delimited format file with 17 columns. pattern: "*.profile" - authors: - "@sofstam" +maintainers: + - "@sofstam" diff --git a/modules/nf-core/kmcp/profile/tests/main.nf.test b/modules/nf-core/kmcp/profile/tests/main.nf.test new file mode 100644 index 00000000..20b303ed --- /dev/null +++ b/modules/nf-core/kmcp/profile/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_process { + + name "Test Process KMCP_PROFILE" + script "../main.nf" + process "KMCP_PROFILE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "kmcp" + tag "kmcp/profile" + tag "untar" + tag "kmcp/compute" + tag "kmcp/index" + tag "kmcp/search" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'delete_me/kmcp/kmcp_profile.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("KMCP_COMPUTE") { + script "../../../kmcp/compute/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + run("KMCP_INDEX") { + script "../../../kmcp/index/main.nf" + process { + """ + input[0] = KMCP_COMPUTE.out.outdir + """ + } + } + + run("KMCP_SEARCH") { + script "../../../kmcp/search/main.nf" + process { + """ + input[0] = [ + [id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = KMCP_INDEX.out.kmcp.map{it[1]} + """ + } + } + } + + test("sarscov2 - fasta") { + when { + process { + """ + input[0] = KMCP_SEARCH.out.result + input[1] = UNTAR.out.untar.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + ).match() + } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = KMCP_SEARCH.out.result + input[1] = UNTAR.out.untar.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/kmcp/profile/tests/main.nf.test.snap b/modules/nf-core/kmcp/profile/tests/main.nf.test.snap new file mode 100644 index 00000000..72b41ce8 --- /dev/null +++ b/modules/nf-core/kmcp/profile/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_kmcp.profile:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,71f3499117cd6d006be15365b761d38b" + ], + "profile": [ + [ + { + "id": "test", + "single_end": true + }, + "test_kmcp.profile:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,71f3499117cd6d006be15365b761d38b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T15:36:20.331533599" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_kmcp.profile:md5,d7318c8f2d578ea2e6355f05789db4f3" + ] + ], + "1": [ + "versions.yml:md5,71f3499117cd6d006be15365b761d38b" + ], + "profile": [ + [ + { + "id": "test", + "single_end": true + }, + "test_kmcp.profile:md5,d7318c8f2d578ea2e6355f05789db4f3" + ] + ], + "versions": [ + "versions.yml:md5,71f3499117cd6d006be15365b761d38b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T15:36:13.138318234" + } +} \ No newline at end of file diff --git a/modules/nf-core/kmcp/profile/tests/nextflow.config b/modules/nf-core/kmcp/profile/tests/nextflow.config new file mode 100644 index 00000000..bee9ca0c --- /dev/null +++ b/modules/nf-core/kmcp/profile/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName: UNTAR { + ext.args2 = {"--no-same-owner"} + } + + withName: KMCP_COMPUTE { + ext.prefix = { "${meta.id}_kmcp" } + } + + withName: KMCP_PROFILE { + ext.prefix = { "${meta.id}_kmcp" } + } +} diff --git a/modules/nf-core/kmcp/profile/tests/tags.yml b/modules/nf-core/kmcp/profile/tests/tags.yml new file mode 100644 index 00000000..7af489b0 --- /dev/null +++ b/modules/nf-core/kmcp/profile/tests/tags.yml @@ -0,0 +1,2 @@ +kmcp/profile: + - "modules/nf-core/kmcp/profile/**" From d7497690ddf42e15b12da46ed1ff94cb8b28600b Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 12 Jun 2024 20:43:54 +0200 Subject: [PATCH 31/35] Update Taxpasta, which stops overly strict failures --- CHANGELOG.md | 10 +- conf/modules.config | 18 +-- modules.json | 4 +- .../nf-core/taxpasta/merge/environment.yml | 7 ++ modules/nf-core/taxpasta/merge/main.nf | 30 +++-- modules/nf-core/taxpasta/merge/meta.yml | 27 +++-- .../nf-core/taxpasta/merge/tests/main.nf.test | 111 ++++++++++++++++++ .../taxpasta/merge/tests/main.nf.test.snap | 83 +++++++++++++ modules/nf-core/taxpasta/merge/tests/tags.yml | 2 + .../taxpasta/standardise/environment.yml | 7 ++ modules/nf-core/taxpasta/standardise/main.nf | 29 +++-- modules/nf-core/taxpasta/standardise/meta.yml | 16 ++- .../taxpasta/standardise/tests/main.nf.test | 58 +++++++++ .../standardise/tests/main.nf.test.snap | 68 +++++++++++ .../taxpasta/standardise/tests/tags.yml | 2 + .../local/standardisation_profiles.nf | 18 ++- 16 files changed, 438 insertions(+), 52 deletions(-) create mode 100644 modules/nf-core/taxpasta/merge/environment.yml create mode 100644 modules/nf-core/taxpasta/merge/tests/main.nf.test create mode 100644 modules/nf-core/taxpasta/merge/tests/main.nf.test.snap create mode 100644 modules/nf-core/taxpasta/merge/tests/tags.yml create mode 100644 modules/nf-core/taxpasta/standardise/environment.yml create mode 100644 modules/nf-core/taxpasta/standardise/tests/main.nf.test create mode 100644 modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap create mode 100644 modules/nf-core/taxpasta/standardise/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index e98482ef..75d506df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,13 +13,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133) - [#491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133) -- [489](https://github.com/nf-core/taxprofiler/pull/489) Fix KrakenUniq classified reads output format mismatch (❤️ to @SannaAb for reporting, fixed by @jfy133) +- [#489](https://github.com/nf-core/taxprofiler/pull/489) Fix KrakenUniq classified reads output format mismatch (❤️ to @SannaAb for reporting, fixed by @jfy133) +- [#493](https://github.com/nf-core/taxprofiler/pull/489) Stop TAXPASTA failures when profiles do not have exact compositionality (fixes by @Midnighter, @jfy133) ### `Dependencies` -| Tool | Previous version | New version | -| ---- | ---------------- | ----------- | -| KMCP | 0.9.1 | 0.9.4 | +| Tool | Previous version | New version | +| -------- | ---------------- | ----------- | +| KMCP | 0.9.1 | 0.9.4 | +| TAXPASTA | 0.6.1 | 0.7.0 | ### `Deprecated` diff --git a/conf/modules.config b/conf/modules.config index 2148b69f..ee0bd5b5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -754,15 +754,14 @@ process { withName: TAXPASTA_MERGE { tag = { "${meta.tool}|${meta.id}" } + ext.prefix = { "${meta.tool}_${meta.id}" } ext.args = { [ - "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}", params.taxpasta_add_name ? "--add-name" : "", params.taxpasta_add_rank ? "--add-rank" : "", params.taxpasta_add_lineage ? "--add-lineage" : "", params.taxpasta_add_idlineage ? "--add-id-lineage" : "", params.taxpasta_add_ranklineage ? "--add-rank-lineage" : "", - params.taxpasta_ignore_errors ? "--ignore-errors" : "" ].join(' ').trim() } publishDir = [ @@ -774,14 +773,15 @@ process { withName: TAXPASTA_STANDARDISE { tag = { "${meta.tool}|${meta.id}" } + ext.prefix = { "${meta.tool}_${meta.id}" } ext.args = { - [ - "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}", - params.taxpasta_add_name ? "--add-name" : "", - params.taxpasta_add_rank ? "--add-rank" : "", - params.taxpasta_add_lineage ? "--add-lineage" : "", - params.taxpasta_add_idlineage ? "--add-id-lineage" : "" - ].join(' ').trim() + [ + params.taxpasta_add_name ? "--add-name" : "", + params.taxpasta_add_rank ? "--add-rank" : "", + params.taxpasta_add_lineage ? "--add-lineage" : "", + params.taxpasta_add_idlineage ? "--add-id-lineage" : "", + params.taxpasta_add_ranklineage ? "--add-rank-lineage" : "" + ].join(' ').trim() } publishDir = [ path: { "${params.outdir}/taxpasta/" }, diff --git a/modules.json b/modules.json index 04001af6..b7fc0290 100644 --- a/modules.json +++ b/modules.json @@ -228,12 +228,12 @@ }, "taxpasta/merge": { "branch": "master", - "git_sha": "48019785051ba491e82dce910273c2eca61bd5b7", + "git_sha": "4fd9089d3cf904e0b870d5a6a7ab903ee5e1004d", "installed_by": ["modules"] }, "taxpasta/standardise": { "branch": "master", - "git_sha": "48019785051ba491e82dce910273c2eca61bd5b7", + "git_sha": "4fd9089d3cf904e0b870d5a6a7ab903ee5e1004d", "installed_by": ["modules"] }, "untar": { diff --git a/modules/nf-core/taxpasta/merge/environment.yml b/modules/nf-core/taxpasta/merge/environment.yml new file mode 100644 index 00000000..ca1a10b7 --- /dev/null +++ b/modules/nf-core/taxpasta/merge/environment.yml @@ -0,0 +1,7 @@ +name: taxpasta_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::taxpasta=0.7.0 diff --git a/modules/nf-core/taxpasta/merge/main.nf b/modules/nf-core/taxpasta/merge/main.nf index de135221..662f6e79 100644 --- a/modules/nf-core/taxpasta/merge/main.nf +++ b/modules/nf-core/taxpasta/merge/main.nf @@ -2,14 +2,16 @@ process TAXPASTA_MERGE { tag "$meta.id" label 'process_single' - conda "bioconda::taxpasta=0.6.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/taxpasta:0.6.1--pyhdfd78af_0': - 'biocontainers/taxpasta:0.6.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/taxpasta:0.7.0--pyhdfd78af_0': + 'biocontainers/taxpasta:0.7.0--pyhdfd78af_0' }" input: tuple val(meta), path(profiles) + val profiler + val format path taxonomy path samplesheet @@ -21,24 +23,34 @@ process TAXPASTA_MERGE { task.ext.when == null || task.ext.when script: - // N.B.: Taxpasta requires a --profiler option and will fail without it. - // This must be specified via a `nextflow.config` or `modules.config`, for - // example, as "--profiler kraken2". Additionally, it requires a --output - // option with the output file name. The desired format will be parsed from - // the name and should correspond to the output pattern specified above, - // e.g., "--output ${task.ext.prefix}.tsv". def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : '' def samplesheet_input = samplesheet ? "-s ${samplesheet}" : '' """ taxpasta merge \\ + --profiler $profiler \\ + --output ${prefix}.${format} \\ $args \\ $taxonomy_option \\ $samplesheet_input \\ $profiles + cat <<-END_VERSIONS > versions.yml + "${task.process}": + taxpasta: \$(taxpasta --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : '' + def samplesheet_input = samplesheet ? "-s ${samplesheet}" : '' + """ + touch ${prefix}.${format} + cat <<-END_VERSIONS > versions.yml "${task.process}": taxpasta: \$(taxpasta --version) diff --git a/modules/nf-core/taxpasta/merge/meta.yml b/modules/nf-core/taxpasta/merge/meta.yml index ed89e62c..a4cbed94 100644 --- a/modules/nf-core/taxpasta/merge/meta.yml +++ b/modules/nf-core/taxpasta/merge/meta.yml @@ -14,9 +14,7 @@ tools: homepage: "https://taxpasta.readthedocs.io/" documentation: "https://taxpasta.readthedocs.io/" tool_dev_url: "https://github.com/taxprofiler/taxpasta" - - licence: "['Apache-2.0']" - + licence: ["Apache-2.0"] input: - meta: type: map @@ -27,17 +25,22 @@ input: type: file description: A list of taxonomic profiler output files (typically in text format, mandatory) pattern: "*.{tsv,csv,arrow,parquet,biom}" - - samplesheet: - type: file - description: - A samplesheet describing the sample name and a filepath to a taxonomic abundance profile that needs to be relative - from the work environment. The profiles must be provided even if you give a samplesheet as argument (optional) - pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet}" + - profiler: + type: string + description: Name of the profiler used to generate the profile (mandatory) + pattern: "bracken|centrifuge|diamond|ganon|kaiju|kmcp|kraken2|krakenuniq|megan6|metaphlan|motus" + - format: + type: string + description: Type of output file to be generated + pattern: "tsv|csv|ods|xlsx|arrow|parquet|biom" - taxonomy: type: directory description: Directory containing at a minimum nodes.dmp and names.dmp files (optional) pattern: "*/" - + - samplesheet: + type: file + description: A samplesheet describing the sample name and a filepath to a taxonomic abundance profile that needs to be relative from the Nextflow work directory of the executed process. The profiles must be provided even if you give a samplesheet as argument (optional) + pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet}" output: - meta: type: map @@ -52,7 +55,9 @@ output: type: file description: Output file with standardised multiple profiles in one go and have all profiles combined into a single table. pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet,biom}" - authors: - "@sofstam" - "@jfy133" +maintainers: + - "@sofstam" + - "@jfy133" diff --git a/modules/nf-core/taxpasta/merge/tests/main.nf.test b/modules/nf-core/taxpasta/merge/tests/main.nf.test new file mode 100644 index 00000000..886e93b9 --- /dev/null +++ b/modules/nf-core/taxpasta/merge/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process TAXPASTA_MERGE" + script "../main.nf" + process "TAXPASTA_MERGE" + tag "modules" + tag "modules_nfcore" + tag "taxpasta" + tag "taxpasta/merge" + + test("sarscov2 - metagenome - kraken report") { + + when { + process { + """ + ch_test1_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_1.kraken2.report.txt') + ch_test2_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_2.kraken2.report.txt') + + input[0] = ch_test1_kraken.mix ( ch_test2_kraken ) + .collect() + .map { files -> + def meta = [:] + meta['id'] = 'kraken2' + meta['profiler'] = 'kraken2' + [meta, files.sort()] + + } + input[1] = 'kraken2' + input[2] = 'tsv' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.merged_profiles[0][1]).readLines().any { it.contains('2697049 100 100') }, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - metagenome - kraken report - samplesheet") { + + when { + process { + """ + ch_test1_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_1.kraken2.report.txt') + ch_test2_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_2.kraken2.report.txt') + + input[0] = ch_test1_kraken.mix ( ch_test2_kraken ) + .collect() + .map { files -> + def meta = [:] + meta['id'] = 'kraken2' + meta['profiler'] = 'kraken2' + [meta, files.sort()] + } + input[1] = 'kraken2' + input[2] = 'tsv' + input[3] = [] + input[4] = Channel.of( + 'sample\tprofile', + 'test_1\t"test_1.kraken2.report.txt"', + 'test_2\t"test_2.kraken2.report.txt"' + ) + .collectFile(name: 'samplesheet.tsv', newLine: true, sort: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - metagenome - kraken report - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true)] + input[1] = 'kraken2' + input[2] = 'tsv' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/taxpasta/merge/tests/main.nf.test.snap b/modules/nf-core/taxpasta/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..afb7e491 --- /dev/null +++ b/modules/nf-core/taxpasta/merge/tests/main.nf.test.snap @@ -0,0 +1,83 @@ +{ + "sarscov2 - metagenome - kraken report": { + "content": [ + true, + [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-11T06:13:35.985987277" + }, + "sarscov2 - metagenome - kraken report - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ], + "merged_profiles": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-10T22:04:59.08186623" + }, + "sarscov2 - metagenome - kraken report - samplesheet": { + "content": [ + { + "0": [ + [ + { + "id": "kraken2", + "profiler": "kraken2" + }, + "kraken2.tsv:md5,3a31a2bbff49f6e03083a2e03f4f6563" + ] + ], + "1": [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ], + "merged_profiles": [ + [ + { + "id": "kraken2", + "profiler": "kraken2" + }, + "kraken2.tsv:md5,3a31a2bbff49f6e03083a2e03f4f6563" + ] + ], + "versions": [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-10T22:25:57.576974385" + } +} \ No newline at end of file diff --git a/modules/nf-core/taxpasta/merge/tests/tags.yml b/modules/nf-core/taxpasta/merge/tests/tags.yml new file mode 100644 index 00000000..5d17220c --- /dev/null +++ b/modules/nf-core/taxpasta/merge/tests/tags.yml @@ -0,0 +1,2 @@ +taxpasta/merge: + - "modules/nf-core/taxpasta/merge/**" diff --git a/modules/nf-core/taxpasta/standardise/environment.yml b/modules/nf-core/taxpasta/standardise/environment.yml new file mode 100644 index 00000000..a48f08e0 --- /dev/null +++ b/modules/nf-core/taxpasta/standardise/environment.yml @@ -0,0 +1,7 @@ +name: taxpasta_standardise +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::taxpasta=0.7.0 diff --git a/modules/nf-core/taxpasta/standardise/main.nf b/modules/nf-core/taxpasta/standardise/main.nf index 7822912a..7b393517 100644 --- a/modules/nf-core/taxpasta/standardise/main.nf +++ b/modules/nf-core/taxpasta/standardise/main.nf @@ -2,13 +2,15 @@ process TAXPASTA_STANDARDISE { tag "$meta.id" label 'process_single' - conda "bioconda::taxpasta=0.6.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/taxpasta:0.6.1--pyhdfd78af_0': - 'biocontainers/taxpasta:0.6.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/taxpasta:0.7.0--pyhdfd78af_0': + 'biocontainers/taxpasta:0.7.0--pyhdfd78af_0' }" input: tuple val(meta), path(profile) + val profiler + val format path taxonomy output: @@ -19,17 +21,13 @@ process TAXPASTA_STANDARDISE { task.ext.when == null || task.ext.when script: - // N.B.: Taxpasta requires a --profiler option and will fail without it. - // This must be specified via a `nextflow.config` or `modules.config`, for - // example, as "--profiler kraken2". Additionally, it requires a --output - // option with the output file name. The desired format will be parsed from - // the name and should correspond to the output pattern specified above, - // e.g., "--output ${task.ext.prefix}.tsv". def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : '' """ taxpasta standardise \\ + --profiler $profiler \\ + --output ${prefix}.${format} \\ $args \\ $taxonomy_option \\ $profile @@ -39,4 +37,17 @@ process TAXPASTA_STANDARDISE { taxpasta: \$(taxpasta --version) END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : '' + """ + touch ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + taxpasta: \$(taxpasta --version) + END_VERSIONS + """ } diff --git a/modules/nf-core/taxpasta/standardise/meta.yml b/modules/nf-core/taxpasta/standardise/meta.yml index 81df6e2c..b855905a 100644 --- a/modules/nf-core/taxpasta/standardise/meta.yml +++ b/modules/nf-core/taxpasta/standardise/meta.yml @@ -14,9 +14,7 @@ tools: homepage: "https://taxpasta.readthedocs.io/" documentation: "https://taxpasta.readthedocs.io/" tool_dev_url: "https://github.com/taxprofiler/taxpasta" - - licence: "['Apache-2.0']" - + licence: ["Apache-2.0"] input: - meta: type: map @@ -27,11 +25,18 @@ input: type: file description: profiler output file (mandatory) pattern: "*" + - profiler: + type: string + description: Name of the profiler used to generate the profile (mandatory) + pattern: "bracken|centrifuge|diamond|ganon|kaiju|kmcp|kraken2|krakenuniq|megan6|metaphlan|motus" + - format: + type: string + description: Type of output file to be generated + pattern: "tsv|csv|ods|xlsx|arrow|parquet|biom" - taxonomy: type: directory description: Directory containing at a minimum nodes.dmp and names.dmp files (optional) pattern: "*/" - output: - meta: type: map @@ -46,6 +51,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@Midnighter" +maintainers: + - "@Midnighter" diff --git a/modules/nf-core/taxpasta/standardise/tests/main.nf.test b/modules/nf-core/taxpasta/standardise/tests/main.nf.test new file mode 100644 index 00000000..e06ca7d6 --- /dev/null +++ b/modules/nf-core/taxpasta/standardise/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process TAXPASTA_STANDARDISE" + script "../main.nf" + process "TAXPASTA_STANDARDISE" + tag "modules" + tag "modules_nfcore" + tag "taxpasta" + tag "taxpasta/standardise" + + test("sarscov2 - metagenome - kraken report") { + + + when { + process { + """ + input[0] = [[id: 'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true)] + input[1] = "kraken2" + input[2] = 'tsv' + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - metagenome - kraken report - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[id: 'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true)] + input[1] = "kraken2" + input[2] = 'tsv' + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap b/modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap new file mode 100644 index 00000000..498711e3 --- /dev/null +++ b/modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - metagenome - kraken report": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,6b62032ed403f431eeb0e17464ccc69b" + ] + ], + "1": [ + "versions.yml:md5,bf00feb66945aab46a78efafac5a261f" + ], + "standardised_profile": [ + [ + { + "id": "test" + }, + "test.tsv:md5,6b62032ed403f431eeb0e17464ccc69b" + ] + ], + "versions": [ + "versions.yml:md5,bf00feb66945aab46a78efafac5a261f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-11T12:35:34.381682299" + }, + "sarscov2 - metagenome - kraken report - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,bf00feb66945aab46a78efafac5a261f" + ], + "standardised_profile": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,bf00feb66945aab46a78efafac5a261f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-11T12:35:41.579178159" + } +} \ No newline at end of file diff --git a/modules/nf-core/taxpasta/standardise/tests/tags.yml b/modules/nf-core/taxpasta/standardise/tests/tags.yml new file mode 100644 index 00000000..43ec42dd --- /dev/null +++ b/modules/nf-core/taxpasta/standardise/tests/tags.yml @@ -0,0 +1,2 @@ +taxpasta/standardise: + - "modules/nf-core/taxpasta/standardise/**" diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 95cd9d3f..ac204497 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -93,11 +93,23 @@ workflow STANDARDISATION_PROFILES { standardise: true } + ch_input_for_taxpasta_merge = ch_input_for_taxpasta.merge + .multiMap{ meta, profiles -> + profiles: [meta, profiles] + tool: meta.tool + } - TAXPASTA_MERGE (ch_input_for_taxpasta.merge , ch_taxpasta_tax_dir, []) + ch_input_for_taxpasta_standardise = ch_input_for_taxpasta.standardise + .multiMap{ meta, profiles -> + profiles: [meta, profiles] + tool: meta.tool + } + + + TAXPASTA_MERGE ( ch_input_for_taxpasta_merge.profiles , ch_input_for_taxpasta_merge.tool , params.standardisation_taxpasta_format, ch_taxpasta_tax_dir, [] ) + TAXPASTA_STANDARDISE ( ch_input_for_taxpasta_standardise.profiles, ch_input_for_taxpasta_standardise.tool, params.standardisation_taxpasta_format, ch_taxpasta_tax_dir ) ch_versions = ch_versions.mix( TAXPASTA_MERGE.out.versions.first() ) - TAXPASTA_STANDARDISE (ch_input_for_taxpasta.standardise, ch_taxpasta_tax_dir ) - ch_version = ch_versions.mix( TAXPASTA_STANDARDISE.out.versions.first() ) + ch_versions = ch_versions.mix( TAXPASTA_STANDARDISE.out.versions.first() ) From 0a3ce1c52072bcac2e8fe32bcf35f643a46f9f82 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 13 Jun 2024 12:41:00 +0200 Subject: [PATCH 32/35] Update CHANGELOG.md Co-authored-by: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75d506df..4b2619cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133) - [#491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133) - [#489](https://github.com/nf-core/taxprofiler/pull/489) Fix KrakenUniq classified reads output format mismatch (❤️ to @SannaAb for reporting, fixed by @jfy133) -- [#493](https://github.com/nf-core/taxprofiler/pull/489) Stop TAXPASTA failures when profiles do not have exact compositionality (fixes by @Midnighter, @jfy133) +- [#495](https://github.com/nf-core/taxprofiler/pull/495) Stop TAXPASTA failures when profiles do not have exact compositionality (fixes by @Midnighter, @jfy133) ### `Dependencies` From ed7639c1b70a35e55d7841dc8ecb11efb7e33b4f Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 20 Jun 2024 08:23:47 +0200 Subject: [PATCH 33/35] Bump version for 1.1.8 rrelease --- CHANGELOG.md | 2 +- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b2619cd..9f3e88de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.1.8dev - Augmented Akita Patch [] +## v1.1.8dev - Augmented Akita Patch [2024-06-20] ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 0d892103..e2801c44 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,8 +1,8 @@ report_comment: > - This report has been generated by the nf-core/taxprofiler + This report has been generated by the nf-core/taxprofiler analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-taxprofiler-methods-description": diff --git a/nextflow.config b/nextflow.config index 83cbf39f..638089c8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -385,7 +385,7 @@ manifest { description = """Taxonomic classification and profiling of shotgun short- and long-read metagenomic data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.8dev' + version = '1.1.8' doi = '10.1101/2023.10.20.563221' } From e84e508acf0399ca24577ca9813177d9b142708e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 20 Jun 2024 10:02:01 +0200 Subject: [PATCH 34/35] Update CHANGELOG.md Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f3e88de..088e8898 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.1.8dev - Augmented Akita Patch [2024-06-20] +## v1.1.8 - Augmented Akita Patch [2024-06-20] ### `Added` From e21b3ed4fccd7d9ba11cae28b8337039a6e3443c Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 20 Jun 2024 16:21:20 +0200 Subject: [PATCH 35/35] Update CHANGELOG.md Co-authored-by: Friederike Hanssen --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 088e8898..d512ab9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#487](https://github.com/nf-core/taxprofiler/pull/487) Updated to nf-core pipeline template v2.14.1 (added by jfy133) +- [#487](https://github.com/nf-core/taxprofiler/pull/487) Updated to nf-core pipeline template v2.14.1 (added by @jfy133) ### `Fixed`