diff --git a/.editorconfig b/.editorconfig index dd9ffa53..72dda289 100644 --- a/.editorconfig +++ b/.editorconfig @@ -28,10 +28,6 @@ indent_style = unset [/assets/email*] indent_size = unset -# ignore Readme -[README.md] -indent_style = unset - -# ignore python +# ignore python and markdown [*.{py,md}] indent_style = unset diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 888970b6..69a2dd8e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/taxp - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/taxprofiler/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/taxprofiler _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 26df7808..bc40e7e3 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,12 +8,12 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/taxprofiler' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -30,7 +30,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 7b722e77..08f95b7e 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,13 +5,13 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/taxprofiler' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -27,7 +27,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e6d5d4df..c46cae01 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,10 +42,10 @@ jobs: steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 2bcf451a..2d20d644 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -32,7 +32,10 @@ jobs: - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: python-version: "3.12" architecture: "x64" @@ -67,8 +70,17 @@ jobs: - name: Inspect download run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - name: Run the downloaded pipeline + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} env: NXF_SINGULARITY_CACHEDIR: ./ NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test_nothing,singularity --outdir ./results + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 74c1ce02..7a068de2 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 073e1876..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,13 +14,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Set up Python 3.11 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 - cache: "pip" + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit @@ -32,14 +31,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -60,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index b706875f..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index d468aeaa..03ecfcf7 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -12,7 +12,7 @@ jobs: - name: get topics and convert to hashtags id: get_topics run: | - curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT - uses: rzr/fediverse-action@master with: @@ -25,13 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} - ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: python-version: "3.10" - name: Install dependencies diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..e0b85a77 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,2 @@ repository_type: pipeline +nf_core_version: "2.14.1" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index af57081f..4dc0f1dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,6 +3,9 @@ repos: rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python rev: "2.7.3" hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md index d607106e..d512ab9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,30 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.1.8 - Augmented Akita Patch [2024-06-20] + +### `Added` + +- [#487](https://github.com/nf-core/taxprofiler/pull/487) Updated to nf-core pipeline template v2.14.1 (added by @jfy133) + +### `Fixed` + +- [#484](https://github.com/nf-core/taxprofiler/pull/484) Improved input validation to immediately fail if run accession IDs within a given sample ID are not unique (❤️ to @sofstam for reporting, fixed by @jfy133) +- [#491](https://github.com/nf-core/taxprofiler/pull/491) Added flag to publish intermediate bracken files (❤️ to @ewissel for reporting, fixed by @sofstam and @jfy133) +- [#489](https://github.com/nf-core/taxprofiler/pull/489) Fix KrakenUniq classified reads output format mismatch (❤️ to @SannaAb for reporting, fixed by @jfy133) +- [#495](https://github.com/nf-core/taxprofiler/pull/495) Stop TAXPASTA failures when profiles do not have exact compositionality (fixes by @Midnighter, @jfy133) + +### `Dependencies` + +| Tool | Previous version | New version | +| -------- | ---------------- | ----------- | +| KMCP | 0.9.1 | 0.9.4 | +| TAXPASTA | 0.6.1 | 0.7.0 | + +### `Deprecated` + +- [#492](https://github.com/nf-core/taxprofiler/pull/492) Removed `--kmcp_mode` parameter from KMCP to allow per database specification by setting in db_params in database sheet (fixed by @jfy133) + ## v1.1.7 - Augmented Akita Patch [2024-04-25] ### `Added` diff --git a/README.md b/README.md index 1661c024..980a2212 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/taxprofiler) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/taxprofiler) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23taxprofiler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/taxprofiler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b9c37c75..e2801c44 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,8 @@ report_comment: > - This report has been generated by the nf-core/taxprofiler + + This report has been generated by the nf-core/taxprofiler analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-taxprofiler-methods-description": diff --git a/assets/schema_input.json b/assets/schema_input.json index 6acc00f7..cc335436 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -14,6 +14,7 @@ }, "run_accession": { "type": "string", + "unique": ["sample"], "errorMessage": "Run accession must be provided and cannot contain spaces." }, "instrument_platform": { diff --git a/conf/modules.config b/conf/modules.config index 1956605e..ee0bd5b5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -467,7 +467,8 @@ process { publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}/" }, mode: params.publish_dir_mode, - pattern: '*.{txt,fastq.gz}' + pattern: '*.{txt,fastq.gz}', + saveAs: { !params.bracken_save_intermediatekraken2 && meta.tool == "bracken" ? null : it } ] } @@ -517,7 +518,7 @@ process { publishDir = [ path: { "${params.outdir}/krakenuniq/${meta.db_name}/" }, mode: params.publish_dir_mode, - pattern: '*.{txt,fasta.gz}' + pattern: '*.{txt,fastq.gz,fasta.gz}' ] } @@ -753,15 +754,14 @@ process { withName: TAXPASTA_MERGE { tag = { "${meta.tool}|${meta.id}" } + ext.prefix = { "${meta.tool}_${meta.id}" } ext.args = { [ - "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}", params.taxpasta_add_name ? "--add-name" : "", params.taxpasta_add_rank ? "--add-rank" : "", params.taxpasta_add_lineage ? "--add-lineage" : "", params.taxpasta_add_idlineage ? "--add-id-lineage" : "", params.taxpasta_add_ranklineage ? "--add-rank-lineage" : "", - params.taxpasta_ignore_errors ? "--ignore-errors" : "" ].join(' ').trim() } publishDir = [ @@ -773,14 +773,15 @@ process { withName: TAXPASTA_STANDARDISE { tag = { "${meta.tool}|${meta.id}" } + ext.prefix = { "${meta.tool}_${meta.id}" } ext.args = { - [ - "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}", - params.taxpasta_add_name ? "--add-name" : "", - params.taxpasta_add_rank ? "--add-rank" : "", - params.taxpasta_add_lineage ? "--add-lineage" : "", - params.taxpasta_add_idlineage ? "--add-id-lineage" : "" - ].join(' ').trim() + [ + params.taxpasta_add_name ? "--add-name" : "", + params.taxpasta_add_rank ? "--add-rank" : "", + params.taxpasta_add_lineage ? "--add-lineage" : "", + params.taxpasta_add_idlineage ? "--add-id-lineage" : "", + params.taxpasta_add_ranklineage ? "--add-rank-lineage" : "" + ].join(' ').trim() } publishDir = [ path: { "${params.outdir}/taxpasta/" }, diff --git a/conf/test.config b/conf/test.config index c11f27b6..042dc2fa 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_mergepairs = true @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = true @@ -42,8 +42,7 @@ params { run_ganon = true run_krona = true run_kmcp = true - kmcp_mode = 0 - krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' + krona_taxonomy_directory = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' malt_save_reads = true kraken2_save_reads = true centrifuge_save_reads = true diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config index c3422d02..ee55ba55 100644 --- a/conf/test_adapterremoval.config +++ b/conf/test_adapterremoval.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_tool = 'adapterremoval' @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = false @@ -41,7 +41,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_bbduk.config b/conf/test_bbduk.config index 623fe191..d0ff530a 100644 --- a/conf/test_bbduk.config +++ b/conf/test_bbduk.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true perform_shortread_complexityfilter = true @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = false @@ -41,7 +41,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_falco.config b/conf/test_falco.config index 3fb77c03..8bcd9889 100644 --- a/conf/test_falco.config +++ b/conf/test_falco.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' preprocessing_qc_tool = 'falco' perform_shortread_qc = true perform_longread_qc = true @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false @@ -41,7 +41,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_fastp.config b/conf/test_fastp.config index 3feeae7a..81bec14c 100644 --- a/conf/test_fastp.config +++ b/conf/test_fastp.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_tool = 'fastp' @@ -30,7 +30,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = false @@ -42,7 +42,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_full.config b/conf/test_full.config index 2a74a80b..067940bb 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -13,8 +13,8 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/samplesheet_full.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_full_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet_full.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_full_v1.1.csv' // Genome references hostremoval_reference = 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/819/615/GCA_000819615.1_ViralProj14015/GCA_000819615.1_ViralProj14015_genomic.fna.gz' diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config index e93de158..fc6305de 100644 --- a/conf/test_krakenuniq.config +++ b/conf/test_krakenuniq.config @@ -24,8 +24,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_krakenuniq.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_mergepairs = true @@ -33,7 +33,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false @@ -44,10 +44,9 @@ params { run_krakenuniq = true run_motus = false run_kmcp = false - kmcp_mode = 0 run_ganon = false run_krona = true - krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' + krona_taxonomy_directory = params.pipelines_testdata_base_path + 'modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' malt_save_reads = false kraken2_save_reads = false centrifuge_save_reads = false diff --git a/conf/test_malt.config b/conf/test_malt.config index 7e5f2df3..7d9bd2b6 100644 --- a/conf/test_malt.config +++ b/conf/test_malt.config @@ -24,15 +24,15 @@ params { max_time = '6.h' // Input data - input = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/samplesheet_malt.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet_malt.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = false perform_longread_qc = false perform_shortread_complexityfilter = false perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false @@ -44,7 +44,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/conf/test_motus.config b/conf/test_motus.config index ef1a2276..c2d4ac22 100644 --- a/conf/test_motus.config +++ b/conf/test_motus.config @@ -24,7 +24,7 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' databases = 'database_motus.csv' perform_shortread_qc = false perform_longread_qc = false @@ -32,7 +32,7 @@ params { perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false @@ -43,7 +43,6 @@ params { run_krakenuniq = false run_motus = true run_kmcp = false - kmcp_mode = 0 run_ganon = false motus_save_mgc_read_counts = false motus_remove_ncbi_ids = false diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index 004a49e8..42014303 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -20,15 +20,15 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = false perform_longread_qc = false perform_shortread_complexityfilter = false perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = true @@ -39,7 +39,6 @@ params { run_krakenuniq = true run_motus = false run_kmcp = true - kmcp_mode = 0 run_ganon = true run_krona = true } diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config index 7cf2317d..4e917fb9 100644 --- a/conf/test_noprofiling.config +++ b/conf/test_noprofiling.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true shortread_qc_mergepairs = true @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = true perform_longread_hostremoval = true perform_runmerging = true - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false @@ -40,7 +40,6 @@ params { run_krakenuniq = false run_motus = false run_kmcp = false - kmcp_mode = 0 run_ganon = false } diff --git a/conf/test_nothing.config b/conf/test_nothing.config index ed247ef4..d36c76d4 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -20,15 +20,15 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = false perform_longread_qc = false perform_shortread_complexityfilter = false perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = false run_kraken2 = false run_bracken = false @@ -39,7 +39,6 @@ params { run_krakenuniq = false run_motus = false run_kmcp = false - kmcp_mode = 0 run_ganon = false } diff --git a/conf/test_prinseqplusplus.config b/conf/test_prinseqplusplus.config index acc23aa8..c7ce2259 100644 --- a/conf/test_prinseqplusplus.config +++ b/conf/test_prinseqplusplus.config @@ -20,8 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' - databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv' + input = params.pipelines_testdata_base_path + 'taxprofiler/samplesheet.csv' + databases = params.pipelines_testdata_base_path + 'taxprofiler/database_v1.1.csv' perform_shortread_qc = true perform_longread_qc = true perform_shortread_complexityfilter = true @@ -29,7 +29,7 @@ params { perform_shortread_hostremoval = false perform_longread_hostremoval = false perform_runmerging = false - hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + hostremoval_reference = params.pipelines_testdata_base_path + 'modules/data/genomics/homo_sapiens/genome/genome.fasta' run_kaiju = true run_kraken2 = true run_bracken = false @@ -41,7 +41,6 @@ params { run_motus = false run_ganon = false run_kmcp = false - kmcp_mode = 0 } process { diff --git a/docs/images/nf-core-taxprofiler_logo_generic.svg b/docs/images/nf-core-taxprofiler_logo_generic.svg new file mode 100644 index 00000000..632997ca --- /dev/null +++ b/docs/images/nf-core-taxprofiler_logo_generic.svg @@ -0,0 +1,2309 @@ + + + +taxfindertaxprofiler/ diff --git a/docs/output.md b/docs/output.md index 2cebd463..732d5964 100644 --- a/docs/output.md +++ b/docs/output.md @@ -376,11 +376,11 @@ The main taxonomic profiling file from Bracken is the `*.tsv` file. This provide - `kraken2/` - `_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `krakentools`) - - If you have also run Bracken, the original Kraken report (i.e., _before_ read re-assignment) will also be included in this directory with `-bracken` suffixed to your Bracken database name. For example: `kraken2--bracken.tsv`. However in most cases you want to use the actual Bracken file (i.e., `bracken_.tsv`). + - If you have also run Bracken, the original Kraken report (i.e., _before_ read re-assignment) will also be included in this directory with `-bracken` suffixed to your Bracken database name if you supply `--bracken_save_intermediatekraken2` to the run. For example: `kraken2--bracken.tsv`. However in most cases you want to use the actual Bracken file (i.e., `bracken_.tsv`). - `/` - `_.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample - `_.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample - - `_.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample. Will be 6 column rather than 8 if `--save_minimizers` specified. + - `_.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample. Will be 6 column rather than 8 if `--save_minimizers` specified. This report will **only** be included if you supply `--bracken_save_intermediatekraken2` to the run. - `_.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample @@ -389,6 +389,8 @@ The main taxonomic classification file from Kraken2 is the `_combined_reports.tx You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline. +When running Bracken, you will only get the 'intermediate' Kraken2 report files in this directory if you supply `--bracken_save_intermediatekraken2` to the run. + ### KrakenUniq [KrakenUniq](https://github.com/fbreitwieser/krakenuniq) (formerly KrakenHLL) is an extension to the fast k-mer-based classification performed by [Kraken](https://github.com/DerrickWood/kraken) with an efficient algorithm for additionally assessing the coverage of unique k-mers found in each species in a dataset. @@ -398,8 +400,8 @@ You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply - `krakenuniq/` - `/` - - `_[.merged].classified.fasta.gz`: Optional FASTA file containing all reads that had a hit against a reference in the database for a given sample. Paired-end input reads are merged in this output. - - `_[.merged].unclassified.fasta.gz`: Optional FASTA file containing all reads that did not have a hit in the database for a given sample. Paired-end input reads are merged in this output. + - `_[.merged].classified.fast{a,q}.gz`: Optional FASTA file containing all reads that had a hit against a reference in the database for a given sample. Paired-end input reads are merged in this output. + - `_[.merged].unclassified.fast{a,q}.gz`: Optional FASTA file containing all reads that did not have a hit in the database for a given sample. Paired-end input reads are merged in this output. - `_.krakenuniq.report.txt`: A Kraken2-style report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits, with an additional column for k-mer coverage, that allows for more accurate distinguishing between false-positive/true-postitive hits. - `_.krakenuniq.classified.txt`: An optional list of read IDs and the hits each read had against each database for a given sample. diff --git a/docs/usage.md b/docs/usage.md index edeca74d..6534b820 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,11 +46,19 @@ The `sample` identifiers have to be the same when you have re-sequenced the same ```csv title="samplesheet.csv" sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta -2612,run1,ILLUMINA,2612_run1_R1.fq.gz,, -2612,run2,ILLUMINA,2612_run2_R1.fq.gz,, -2612,run3,ILLUMINA,2612_run3_R1.fq.gz,2612_run3_R2.fq.gz, +2612,lane1,ILLUMINA,2612_lane1_R1.fq.gz,ILLUMINA,2612_lane1_R2.fq.gz, +2612,lane2,ILLUMINA,2612_lane2_R1.fq.gz,ILLUMINA,2612_lane2_R2.fq.gz, +2612,lane3,ILLUMINA,2612_lane3_R1.fq.gz,, ``` +::: info +Please note that the column name `run_accession` follows the definition of an ENA 'run'. +A 'run' corresponds to a single or paired-end set of demultiplexed FASTQs. +Given that demultiplexing of a given library happens per lane, each sequencing pair from each lane is a 'run'. +Therefore, for each sample, you may get multiple 'runs' consisting of _both_ lanes (of the same library) _and_ sequencing libraries. +Therefore ensure that each `run_accession` ID is unique, even if from the same sample! +::: + :::warning Runs of the same sample sequenced on Illumina platforms with a combination of single and paired-end data will **not** be run-wise concatenated, unless pair-merging is specified. In the example above, `run3` will be profiled independently of `run1` and `run2` if pairs are not merged. ::: @@ -452,6 +460,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. diff --git a/modules.json b/modules.json index f8c101e0..b7fc0290 100644 --- a/modules.json +++ b/modules.json @@ -67,7 +67,7 @@ }, "fastqc": { "branch": "master", - "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", "installed_by": ["modules"] }, "filtlong": { @@ -112,12 +112,12 @@ }, "kmcp/profile": { "branch": "master", - "git_sha": "e198734cc3be18af5f64f6d7734c7f1a7c3af5a6", + "git_sha": "6f56948d0674ad5870035e80c7af209a51d8e243", "installed_by": ["modules"] }, "kmcp/search": { "branch": "master", - "git_sha": "e198734cc3be18af5f64f6d7734c7f1a7c3af5a6", + "git_sha": "64cd3f418b191a008b9d362b8ccf0216ae0302d5", "installed_by": ["modules"] }, "kraken2/kraken2": { @@ -137,7 +137,7 @@ }, "krakenuniq/preloadedkrakenuniq": { "branch": "master", - "git_sha": "8bbaa881ab9e59f3e18680550d65d52339640630", + "git_sha": "9de9365c3ca6071ec01705919f6667c718ef47b4", "installed_by": ["modules"] }, "krona/ktimporttaxonomy": { @@ -228,12 +228,12 @@ }, "taxpasta/merge": { "branch": "master", - "git_sha": "48019785051ba491e82dce910273c2eca61bd5b7", + "git_sha": "4fd9089d3cf904e0b870d5a6a7ab903ee5e1004d", "installed_by": ["modules"] }, "taxpasta/standardise": { "branch": "master", - "git_sha": "48019785051ba491e82dce910273c2eca61bd5b7", + "git_sha": "4fd9089d3cf904e0b870d5a6a7ab903ee5e1004d", "installed_by": ["modules"] }, "untar": { @@ -252,7 +252,7 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9e19a74c..d79f1c86 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -25,6 +25,11 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name @@ -33,6 +38,7 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/kmcp/profile/environment.yml b/modules/nf-core/kmcp/profile/environment.yml new file mode 100644 index 00000000..43de2a64 --- /dev/null +++ b/modules/nf-core/kmcp/profile/environment.yml @@ -0,0 +1,7 @@ +name: kmcp_profile +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kmcp=0.9.4 diff --git a/modules/nf-core/kmcp/profile/main.nf b/modules/nf-core/kmcp/profile/main.nf index a4672122..3de9fdb7 100644 --- a/modules/nf-core/kmcp/profile/main.nf +++ b/modules/nf-core/kmcp/profile/main.nf @@ -2,15 +2,14 @@ process KMCP_PROFILE { tag "$meta.id" label 'process_medium' - conda "bioconda::kmcp=0.9.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kmcp:0.9.1--h9ee0642_0': - 'biocontainers/kmcp:0.9.1--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/kmcp:0.9.4--h9ee0642_0': + 'biocontainers/kmcp:0.9.4--h9ee0642_0' }" input: tuple val(meta), path(search_results) path (db) - val mode output: tuple val(meta), path("*.profile"), emit: profile @@ -30,7 +29,6 @@ process KMCP_PROFILE { $args \\ -X \$taxdump \\ -T \$taxid \\ - -m $mode \\ -j $task.cpus \\ -o ${prefix}.profile \\ $search_results diff --git a/modules/nf-core/kmcp/profile/meta.yml b/modules/nf-core/kmcp/profile/meta.yml index 14f292c7..ba1ca2a2 100644 --- a/modules/nf-core/kmcp/profile/meta.yml +++ b/modules/nf-core/kmcp/profile/meta.yml @@ -15,30 +15,21 @@ tools: documentation: "https://bioinf.shenwei.me/kmcp/usage/#profile" tool_dev_url: "https://github.com/shenwei356/kmcp" doi: "10.1093/bioinformatics/btac845" - licence: "['MIT']" - + licence: ["MIT"] input: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - db: - type: directory - description: Database directory containing taxdump files and taxid file - search_results: type: file description: Gzipped file output from kmcp search module pattern: "*.gz" - - mode: - type: integer - description: Profiling mode. - 0-pathogen detection - 1-higher recall - 2-high recall - 3-default - 4-high precision - 5-higher precision + - db: + type: directory + description: Database directory containing taxdump files and taxid file + output: - meta: type: map @@ -53,6 +44,7 @@ output: type: file description: Tab-delimited format file with 17 columns. pattern: "*.profile" - authors: - "@sofstam" +maintainers: + - "@sofstam" diff --git a/modules/nf-core/kmcp/profile/tests/main.nf.test b/modules/nf-core/kmcp/profile/tests/main.nf.test new file mode 100644 index 00000000..20b303ed --- /dev/null +++ b/modules/nf-core/kmcp/profile/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_process { + + name "Test Process KMCP_PROFILE" + script "../main.nf" + process "KMCP_PROFILE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "kmcp" + tag "kmcp/profile" + tag "untar" + tag "kmcp/compute" + tag "kmcp/index" + tag "kmcp/search" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'delete_me/kmcp/kmcp_profile.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("KMCP_COMPUTE") { + script "../../../kmcp/compute/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + run("KMCP_INDEX") { + script "../../../kmcp/index/main.nf" + process { + """ + input[0] = KMCP_COMPUTE.out.outdir + """ + } + } + + run("KMCP_SEARCH") { + script "../../../kmcp/search/main.nf" + process { + """ + input[0] = [ + [id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = KMCP_INDEX.out.kmcp.map{it[1]} + """ + } + } + } + + test("sarscov2 - fasta") { + when { + process { + """ + input[0] = KMCP_SEARCH.out.result + input[1] = UNTAR.out.untar.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + ).match() + } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = KMCP_SEARCH.out.result + input[1] = UNTAR.out.untar.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/kmcp/profile/tests/main.nf.test.snap b/modules/nf-core/kmcp/profile/tests/main.nf.test.snap new file mode 100644 index 00000000..72b41ce8 --- /dev/null +++ b/modules/nf-core/kmcp/profile/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_kmcp.profile:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,71f3499117cd6d006be15365b761d38b" + ], + "profile": [ + [ + { + "id": "test", + "single_end": true + }, + "test_kmcp.profile:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,71f3499117cd6d006be15365b761d38b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T15:36:20.331533599" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_kmcp.profile:md5,d7318c8f2d578ea2e6355f05789db4f3" + ] + ], + "1": [ + "versions.yml:md5,71f3499117cd6d006be15365b761d38b" + ], + "profile": [ + [ + { + "id": "test", + "single_end": true + }, + "test_kmcp.profile:md5,d7318c8f2d578ea2e6355f05789db4f3" + ] + ], + "versions": [ + "versions.yml:md5,71f3499117cd6d006be15365b761d38b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T15:36:13.138318234" + } +} \ No newline at end of file diff --git a/modules/nf-core/kmcp/profile/tests/nextflow.config b/modules/nf-core/kmcp/profile/tests/nextflow.config new file mode 100644 index 00000000..bee9ca0c --- /dev/null +++ b/modules/nf-core/kmcp/profile/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName: UNTAR { + ext.args2 = {"--no-same-owner"} + } + + withName: KMCP_COMPUTE { + ext.prefix = { "${meta.id}_kmcp" } + } + + withName: KMCP_PROFILE { + ext.prefix = { "${meta.id}_kmcp" } + } +} diff --git a/modules/nf-core/kmcp/profile/tests/tags.yml b/modules/nf-core/kmcp/profile/tests/tags.yml new file mode 100644 index 00000000..7af489b0 --- /dev/null +++ b/modules/nf-core/kmcp/profile/tests/tags.yml @@ -0,0 +1,2 @@ +kmcp/profile: + - "modules/nf-core/kmcp/profile/**" diff --git a/modules/nf-core/kmcp/search/environment.yml b/modules/nf-core/kmcp/search/environment.yml new file mode 100644 index 00000000..397fcb8a --- /dev/null +++ b/modules/nf-core/kmcp/search/environment.yml @@ -0,0 +1,7 @@ +name: kmcp_search +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kmcp=0.9.4 diff --git a/modules/nf-core/kmcp/search/main.nf b/modules/nf-core/kmcp/search/main.nf index cb2d6843..62f74aeb 100644 --- a/modules/nf-core/kmcp/search/main.nf +++ b/modules/nf-core/kmcp/search/main.nf @@ -2,14 +2,14 @@ process KMCP_SEARCH { tag "$meta.id" label 'process_medium' - conda "bioconda::kmcp=0.9.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kmcp:0.9.1--h9ee0642_0': - 'biocontainers/kmcp:0.9.1--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/kmcp:0.9.4--h9ee0642_0': + 'biocontainers/kmcp:0.9.4--h9ee0642_0' }" input: - path(db) tuple val(meta), path(reads) + path(db) output: tuple val(meta), path("*.gz") , emit: result @@ -40,7 +40,7 @@ process KMCP_SEARCH { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.gz + echo "" | gzip > ${prefix}.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/kmcp/search/meta.yml b/modules/nf-core/kmcp/search/meta.yml index 5526a179..6d003b04 100644 --- a/modules/nf-core/kmcp/search/meta.yml +++ b/modules/nf-core/kmcp/search/meta.yml @@ -14,23 +14,21 @@ tools: documentation: "https://github.com/shenwei356/kmcp#documents" tool_dev_url: "https://github.com/shenwei356/kmcp" doi: "10.1093/bioinformatics/btac845" - licence: "['MIT']" - + licence: ["MIT"] input: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - db: - type: directory - description: Database directory created by "kmcp index" - pattern: "*" - reads: type: file description: gzipped fasta or fastq files pattern: "*.{fq.gz,fastq.gz,fa.gz}" - + - db: + type: directory + description: Database directory created by "kmcp index" + pattern: "*" output: - meta: type: map @@ -45,6 +43,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@sofstam" +maintainers: + - "@sofstam" diff --git a/modules/nf-core/kmcp/search/tests/main.nf.test b/modules/nf-core/kmcp/search/tests/main.nf.test new file mode 100644 index 00000000..adc693e6 --- /dev/null +++ b/modules/nf-core/kmcp/search/tests/main.nf.test @@ -0,0 +1,88 @@ +nextflow_process { + + name "Test Process KMCP_SEARCH" + script "../main.nf" + process "KMCP_SEARCH" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "kmcp" + tag "kmcp/search" + tag "kmcp/compute" + tag "kmcp/index" + + setup { + run("KMCP_COMPUTE") { + script "../../../kmcp/compute/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + run("KMCP_INDEX") { + script "../../../kmcp/index/main.nf" + process { + """ + input[0] = KMCP_COMPUTE.out.outdir + """ + } + } + } + + test("sarscov2 - fasta") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = KMCP_INDEX.out.kmcp.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + ).match() + } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = KMCP_INDEX.out.kmcp.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/kmcp/search/tests/main.nf.test.snap b/modules/nf-core/kmcp/search/tests/main.nf.test.snap new file mode 100644 index 00000000..e245e2e9 --- /dev/null +++ b/modules/nf-core/kmcp/search/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,69d488bec087e13e13bef0482633b6c3" + ], + "result": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,69d488bec087e13e13bef0482633b6c3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T14:41:54.308010562" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gz:md5,84953d3d517f65722d43a2c3fdd04935" + ] + ], + "1": [ + "versions.yml:md5,69d488bec087e13e13bef0482633b6c3" + ], + "result": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gz:md5,84953d3d517f65722d43a2c3fdd04935" + ] + ], + "versions": [ + "versions.yml:md5,69d488bec087e13e13bef0482633b6c3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-06T21:35:20.053225742" + } +} \ No newline at end of file diff --git a/modules/nf-core/kmcp/search/tests/nextflow.config b/modules/nf-core/kmcp/search/tests/nextflow.config new file mode 100644 index 00000000..9366eab4 --- /dev/null +++ b/modules/nf-core/kmcp/search/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: KMCP_INDEX { + ext.prefix = { "${meta.id}_kmcp" } + } +} diff --git a/modules/nf-core/kmcp/search/tests/tags.yml b/modules/nf-core/kmcp/search/tests/tags.yml new file mode 100644 index 00000000..e281416c --- /dev/null +++ b/modules/nf-core/kmcp/search/tests/tags.yml @@ -0,0 +1,2 @@ +kmcp/search: + - "modules/nf-core/kmcp/search/**" diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index 59055bdb..78b2f3ab 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -8,34 +8,37 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { 'biocontainers/krakenuniq:1.0.4--pl5321h19e8d03_0' }" input: - tuple val(meta), path(fastqs) - path db + tuple val(meta), path(sequences) + val sequence_type + path db val ram_chunk_size val save_output_reads val report_file val save_output output: - tuple val(meta), path('*.classified.fasta.gz') , optional:true, emit: classified_reads_fasta - tuple val(meta), path('*.unclassified.fasta.gz') , optional:true, emit: unclassified_reads_fasta - tuple val(meta), path('*.krakenuniq.classified.txt'), optional:true, emit: classified_assignment - tuple val(meta), path('*.krakenuniq.report.txt') , emit: report - path "versions.yml" , emit: versions + tuple val(meta), path("*.classified.${sequence_type}.gz") , optional:true, emit: classified_reads + tuple val(meta), path("*.unclassified.${sequence_type}.gz"), optional:true, emit: unclassified_reads + tuple val(meta), path('*.krakenuniq.classified.txt') , optional:true, emit: classified_assignment + tuple val(meta), path('*.krakenuniq.report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + assert sequence_type in ['fasta', 'fastq'] + def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' - def classified = meta.single_end ? '"\${PREFIX}.classified.fasta"' : '"\${PREFIX}.merged.classified.fasta"' - def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fasta"' : '"\${PREFIX}.merged.unclassified.fasta"' - def classified_option = save_output_reads ? "--classified-out ${classified}" : '' - def unclassified_option = save_output_reads ? "--unclassified-out ${unclassified}" : '' + classified = meta.single_end ? "\${PREFIX}.classified.${sequence_type}" : "\${PREFIX}.merged.classified.${sequence_type}" + unclassified = meta.single_end ? "\${PREFIX}.unclassified.${sequence_type}" : "\${PREFIX}.merged.unclassified.${sequence_type}" + classified_option = save_output_reads ? "--classified-out \"${classified}\"" : '' + unclassified_option = save_output_reads ? "--unclassified-out \"${unclassified}\"" : '' def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' - def compress_reads_command = save_output_reads ? 'gzip --no-name *.fasta' : '' + compress_reads_command = save_output_reads ? "find . -name '*.${sequence_type}' -print0 | xargs -0 -t -P ${task.cpus} -I % gzip --no-name %" : '' if (meta.single_end) { """ krakenuniq \\ @@ -51,7 +54,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo "\${result%%.*}" } - printf "%s\\n" ${fastqs} | while read FASTQ; do \\ + printf "%s\\n" ${sequences} | while read FASTQ; do \\ PREFIX="\$(strip_suffix "\${FASTQ}")" krakenuniq \\ @@ -89,7 +92,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo "\${result%.}" } - printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\ + printf "%s %s\\n" ${sequences} | while read FASTQ; do \\ read -r -a FASTQ <<< "\${FASTQ}" PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)" @@ -115,16 +118,18 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { } stub: + assert sequence_type in ['fasta', 'fastq'] + def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' - def classified = meta.single_end ? '"\${PREFIX}.classified.fasta"' : '"\${PREFIX}.merged.classified.fasta"' - def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fasta"' : '"\${PREFIX}.merged.unclassified.fasta"' - def classified_option = save_output_reads ? "--classified-out ${classified}" : '' - def unclassified_option = save_output_reads ? "--unclassified-out ${unclassified}" : '' + classified = meta.single_end ? "\${PREFIX}.classified.${sequence_type}" : "\${PREFIX}.merged.classified.${sequence_type}" + unclassified = meta.single_end ? "\${PREFIX}.unclassified.${sequence_type}" : "\${PREFIX}.merged.unclassified.${sequence_type}" + classified_option = save_output_reads ? "--classified-out \"${classified}\"" : '' + unclassified_option = save_output_reads ? "--unclassified-out \"${unclassified}\"" : '' def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' - def compress_reads_command = save_output_reads ? 'gzip --no-name *.fasta' : '' + compress_reads_command = save_output_reads ? "find . -name '*.${sequence_type}' -print0 | xargs -0 -t -P ${task.cpus} -I % gzip --no-name %" : '' if (meta.single_end) { """ echo krakenuniq \\ @@ -148,7 +153,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } - printf "%s\\n" ${fastqs} | while read FASTQ; do \\ + printf "%s\\n" ${sequences} | while read FASTQ; do \\ echo "\${FASTQ}" PREFIX="\$(strip_suffix "\${FASTQ}")" echo "\${PREFIX}" @@ -165,11 +170,11 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { create_file "\${PREFIX}.krakenuniq.classified.txt" create_file "\${PREFIX}.krakenuniq.report.txt" - create_gzip_file "\${PREFIX}.classified.fasta.gz" - create_gzip_file "\${PREFIX}.unclassified.fasta.gz" + create_gzip_file "\${PREFIX}.classified.${sequence_type}.gz" + create_gzip_file "\${PREFIX}.unclassified.${sequence_type}.gz" done - echo $compress_reads_command + echo "$compress_reads_command" cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -201,7 +206,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { echo '<3 nf-core' | gzip -n > "\$1" } - printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\ + printf "%s %s\\n" ${sequences} | while read FASTQ; do \\ read -r -a FASTQ <<< "\${FASTQ}" echo "\${FASTQ[@]}" PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)" @@ -220,11 +225,11 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { create_file "\${PREFIX}.krakenuniq.classified.txt" create_file "\${PREFIX}.krakenuniq.report.txt" - create_gzip_file "\${PREFIX}.merged.classified.fasta.gz" - create_gzip_file "\${PREFIX}.merged.unclassified.fasta.gz" + create_gzip_file "\${PREFIX}.merged.classified.${sequence_type}.gz" + create_gzip_file "\${PREFIX}.merged.unclassified.${sequence_type}.gz" done - echo $compress_reads_command + echo "$compress_reads_command" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml index 4a6dffee..bb6409a6 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml @@ -19,9 +19,13 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - fastqs: + - sequences: type: file - description: List of input FastQ files + description: List of input files containing sequences. All of them must be either in FASTA or FASTQ format. + - sequence_type: + type: string + description: Format of all given sequencing files as literal string, either 'fasta' or 'fastq'. + pattern: "{fasta,fastq}" - db: type: directory description: KrakenUniq database @@ -32,31 +36,33 @@ input: - save_output_reads: type: boolean description: | - Optionally commands are added to save classified and unclassified reads as FASTA files. - When the input is paired-end, the single output FASTA contains merged reads. - - save_reads_assignment: + Optionally, commands are added to save classified and unclassified reads + as FASTQ or FASTA files depending on the input format. When the input + is paired-end, the single output FASTQ contains merged reads. + - report_file: type: boolean - description: | - If true, an optional command is added to save a file reporting the taxonomic - classification of each input read + description: Whether to generate a report of relative abundances. + - save_output: + type: boolean + description: Whether to save a file reporting the taxonomic classification of each input read. output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - classified_reads_fasta: + - classified_reads: type: file description: | Reads classified as belonging to any of the taxa in the KrakenUniq reference database. - pattern: "*.classified.fasta.gz" - - unclassified_reads_fasta: + pattern: "*.classified.{fastq,fasta}.gz" + - unclassified_reads: type: file description: | Reads not classified to any of the taxa in the KrakenUniq reference database. - pattern: "*.unclassified.fasta.gz" + pattern: "*.unclassified.{fastq,fasta}.gz" - classified_assignment: type: file description: | diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test index a7c44707..9e1d6700 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test @@ -7,10 +7,23 @@ nextflow_process { tag "modules_nfcore" tag "krakenuniq" tag "krakenuniq/preloadedkrakenuniq" + tag "untar" - test("sarscov2 - Illumina FASTQ single - stub-run") { - options "-stub-run" + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [id: 'krakenuniq'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/krakenuniq.tar.gz', checkIfExists: true) + ] + """ + } + } + } + test("sarscov2 - FASTA") { when { params { outdir = "$outputDir" @@ -19,15 +32,170 @@ nextflow_process { """ input[0] = [ [id:'test', single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = 'fasta' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' + input[4] = true + input[5] = true + input[6] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'genome.krakenuniq.report.txt' }, + { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'genome.unclassified.fasta.gz' }, + { assert snapshot( + process.out.classified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fasta') }, + ) + } + + } + + test("sarscov2 - Illumina FASTQ single") { + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' + input[4] = true + input[5] = true + input[6] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'test_interleaved.krakenuniq.report.txt' }, + { assert snapshot( + process.out.classified_reads, + process.out.unclassified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fastq-single') }, + ) + } + + } + + test("sarscov2 - Illumina FASTQ paired-end") { + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:false], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] - input[1] = [] - input[2] = '8GB' - input[3] = true + input[1] = 'fastq' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' + input[4] = true + input[5] = true + input[6] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'test.krakenuniq.report.txt' }, + { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'test.merged.unclassified.fastq.gz' }, + { assert snapshot( + process.out.classified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fastq-paired') }, + ) + } + + } + + test("sarscov2 - FASTA - stub") { + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + input[1] = 'fasta' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' + input[4] = true + input[5] = true + input[6] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'genome.krakenuniq.report.txt' }, + { assert file(process.out.unclassified_reads.get(0).get(1)).name == 'genome.unclassified.fasta.gz' }, + { assert snapshot( + process.out.classified_reads, + process.out.unclassified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fasta-stub') }, + ) + } + + } + + test("sarscov2 - Illumina FASTQ single - stub") { + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test', single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' input[4] = true input[5] = true + input[6] = true """ } } @@ -35,13 +203,20 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'test_interleaved.krakenuniq.report.txt' }, + { assert snapshot( + process.out.classified_reads, + process.out.unclassified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fastq-single-stub') }, ) } } - test("sarscov2 - Illumina FASTQ paired-end - stub-run") { + test("sarscov2 - Illumina FASTQ paired-end - stub") { options "-stub-run" when { @@ -53,15 +228,16 @@ nextflow_process { input[0] = [ [id:'test', single_end:false], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] - input[1] = [] - input[2] = '8GB' - input[3] = true + input[1] = 'fastq' + input[2] = UNTAR.out.untar.map { it[1] } + input[3] = '1GB' input[4] = true input[5] = true + input[6] = true """ } } @@ -69,7 +245,14 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() }, + // Report contains a timestamp. + { assert file(process.out.report.get(0).get(1)).name == 'test.krakenuniq.report.txt' }, + { assert snapshot( + process.out.classified_reads, + process.out.unclassified_reads, + process.out.classified_assignment, + process.out.versions + ).match('fastq-paired-stub') }, ) } diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap index 970865bd..2a431be8 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/tests/main.nf.test.snap @@ -1,172 +1,218 @@ { - "sarscov2 - Illumina FASTQ paired-end - stub-run": { + "fastq-single-stub": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.merged.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.merged.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "4": [ - "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" - ], - "classified_assignment": [ - [ - { - "id": "test", - "single_end": false - }, - "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "classified_reads_fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.merged.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "report": [ - [ - { - "id": "test", - "single_end": false - }, - "test.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "unclassified_reads_fasta": [ - [ - { - "id": "test", - "single_end": false - }, - "test.merged.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "versions": [ - "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" - ] - } + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:21:36.338887437" + }, + "fastq-single": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.classified.fastq.gz:md5,3bd95021a8fbced1be8039b990b28176" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.unclassified.fastq.gz:md5,143c7eb70ca93cc2d5ea98767c370424" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_interleaved.krakenuniq.classified.txt:md5,88a734a9a9216cb0770a77f36c9f4e78" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:17:43.586414914" + }, + "fastq-paired": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.classified.fastq.gz:md5,dd7651837cce63e6108e28f4f019aedb" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.krakenuniq.classified.txt:md5,ed5e19c7a88312cc04e483ac5f2579cd" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] ], - "timestamp": "2023-11-21T15:38:47.810576872" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:37:46.718293365" }, - "sarscov2 - Illumina FASTQ single - stub-run": { + "fasta-stub": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "4": [ - "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" - ], - "classified_assignment": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "classified_reads_fasta": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.classified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "report": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.krakenuniq.report.txt:md5,a5704c35e6b573a45e3a344768fe6975" - ] - ], - "unclassified_reads_fasta": [ - [ - { - "id": "test", - "single_end": true - }, - "test_1.unclassified.fasta.gz:md5,34ed306e94fa7eed00b1adccd2e0de20" - ] - ], - "versions": [ - "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" - ] - } + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.classified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.unclassified.fasta.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:28:27.729550991" + }, + "fastq-paired-stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.classified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.unclassified.fastq.gz:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.krakenuniq.classified.txt:md5,a5704c35e6b573a45e3a344768fe6975" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-05T20:06:20.262529457" + }, + "fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.classified.fasta.gz:md5,e73599798195a519ba2565c3f0275b93" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "genome.krakenuniq.classified.txt:md5,8aafacd89a6aac98aaf512df0a7493d1" + ] + ], + [ + "versions.yml:md5,6abf6c733f53fa3b6aaaa6f06864ef0c" + ] ], - "timestamp": "2023-11-21T15:38:42.894597091" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-06T11:36:00.24752418" } } \ No newline at end of file diff --git a/modules/nf-core/taxpasta/merge/environment.yml b/modules/nf-core/taxpasta/merge/environment.yml new file mode 100644 index 00000000..ca1a10b7 --- /dev/null +++ b/modules/nf-core/taxpasta/merge/environment.yml @@ -0,0 +1,7 @@ +name: taxpasta_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::taxpasta=0.7.0 diff --git a/modules/nf-core/taxpasta/merge/main.nf b/modules/nf-core/taxpasta/merge/main.nf index de135221..662f6e79 100644 --- a/modules/nf-core/taxpasta/merge/main.nf +++ b/modules/nf-core/taxpasta/merge/main.nf @@ -2,14 +2,16 @@ process TAXPASTA_MERGE { tag "$meta.id" label 'process_single' - conda "bioconda::taxpasta=0.6.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/taxpasta:0.6.1--pyhdfd78af_0': - 'biocontainers/taxpasta:0.6.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/taxpasta:0.7.0--pyhdfd78af_0': + 'biocontainers/taxpasta:0.7.0--pyhdfd78af_0' }" input: tuple val(meta), path(profiles) + val profiler + val format path taxonomy path samplesheet @@ -21,24 +23,34 @@ process TAXPASTA_MERGE { task.ext.when == null || task.ext.when script: - // N.B.: Taxpasta requires a --profiler option and will fail without it. - // This must be specified via a `nextflow.config` or `modules.config`, for - // example, as "--profiler kraken2". Additionally, it requires a --output - // option with the output file name. The desired format will be parsed from - // the name and should correspond to the output pattern specified above, - // e.g., "--output ${task.ext.prefix}.tsv". def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : '' def samplesheet_input = samplesheet ? "-s ${samplesheet}" : '' """ taxpasta merge \\ + --profiler $profiler \\ + --output ${prefix}.${format} \\ $args \\ $taxonomy_option \\ $samplesheet_input \\ $profiles + cat <<-END_VERSIONS > versions.yml + "${task.process}": + taxpasta: \$(taxpasta --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : '' + def samplesheet_input = samplesheet ? "-s ${samplesheet}" : '' + """ + touch ${prefix}.${format} + cat <<-END_VERSIONS > versions.yml "${task.process}": taxpasta: \$(taxpasta --version) diff --git a/modules/nf-core/taxpasta/merge/meta.yml b/modules/nf-core/taxpasta/merge/meta.yml index ed89e62c..a4cbed94 100644 --- a/modules/nf-core/taxpasta/merge/meta.yml +++ b/modules/nf-core/taxpasta/merge/meta.yml @@ -14,9 +14,7 @@ tools: homepage: "https://taxpasta.readthedocs.io/" documentation: "https://taxpasta.readthedocs.io/" tool_dev_url: "https://github.com/taxprofiler/taxpasta" - - licence: "['Apache-2.0']" - + licence: ["Apache-2.0"] input: - meta: type: map @@ -27,17 +25,22 @@ input: type: file description: A list of taxonomic profiler output files (typically in text format, mandatory) pattern: "*.{tsv,csv,arrow,parquet,biom}" - - samplesheet: - type: file - description: - A samplesheet describing the sample name and a filepath to a taxonomic abundance profile that needs to be relative - from the work environment. The profiles must be provided even if you give a samplesheet as argument (optional) - pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet}" + - profiler: + type: string + description: Name of the profiler used to generate the profile (mandatory) + pattern: "bracken|centrifuge|diamond|ganon|kaiju|kmcp|kraken2|krakenuniq|megan6|metaphlan|motus" + - format: + type: string + description: Type of output file to be generated + pattern: "tsv|csv|ods|xlsx|arrow|parquet|biom" - taxonomy: type: directory description: Directory containing at a minimum nodes.dmp and names.dmp files (optional) pattern: "*/" - + - samplesheet: + type: file + description: A samplesheet describing the sample name and a filepath to a taxonomic abundance profile that needs to be relative from the Nextflow work directory of the executed process. The profiles must be provided even if you give a samplesheet as argument (optional) + pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet}" output: - meta: type: map @@ -52,7 +55,9 @@ output: type: file description: Output file with standardised multiple profiles in one go and have all profiles combined into a single table. pattern: "*.{tsv,csv,ods,xlsx,arrow,parquet,biom}" - authors: - "@sofstam" - "@jfy133" +maintainers: + - "@sofstam" + - "@jfy133" diff --git a/modules/nf-core/taxpasta/merge/tests/main.nf.test b/modules/nf-core/taxpasta/merge/tests/main.nf.test new file mode 100644 index 00000000..886e93b9 --- /dev/null +++ b/modules/nf-core/taxpasta/merge/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process TAXPASTA_MERGE" + script "../main.nf" + process "TAXPASTA_MERGE" + tag "modules" + tag "modules_nfcore" + tag "taxpasta" + tag "taxpasta/merge" + + test("sarscov2 - metagenome - kraken report") { + + when { + process { + """ + ch_test1_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_1.kraken2.report.txt') + ch_test2_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_2.kraken2.report.txt') + + input[0] = ch_test1_kraken.mix ( ch_test2_kraken ) + .collect() + .map { files -> + def meta = [:] + meta['id'] = 'kraken2' + meta['profiler'] = 'kraken2' + [meta, files.sort()] + + } + input[1] = 'kraken2' + input[2] = 'tsv' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.merged_profiles[0][1]).readLines().any { it.contains('2697049 100 100') }, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - metagenome - kraken report - samplesheet") { + + when { + process { + """ + ch_test1_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_1.kraken2.report.txt') + ch_test2_kraken = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true).collectFile(name: 'test_2.kraken2.report.txt') + + input[0] = ch_test1_kraken.mix ( ch_test2_kraken ) + .collect() + .map { files -> + def meta = [:] + meta['id'] = 'kraken2' + meta['profiler'] = 'kraken2' + [meta, files.sort()] + } + input[1] = 'kraken2' + input[2] = 'tsv' + input[3] = [] + input[4] = Channel.of( + 'sample\tprofile', + 'test_1\t"test_1.kraken2.report.txt"', + 'test_2\t"test_2.kraken2.report.txt"' + ) + .collectFile(name: 'samplesheet.tsv', newLine: true, sort: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - metagenome - kraken report - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true)] + input[1] = 'kraken2' + input[2] = 'tsv' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/taxpasta/merge/tests/main.nf.test.snap b/modules/nf-core/taxpasta/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..afb7e491 --- /dev/null +++ b/modules/nf-core/taxpasta/merge/tests/main.nf.test.snap @@ -0,0 +1,83 @@ +{ + "sarscov2 - metagenome - kraken report": { + "content": [ + true, + [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-11T06:13:35.985987277" + }, + "sarscov2 - metagenome - kraken report - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ], + "merged_profiles": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-10T22:04:59.08186623" + }, + "sarscov2 - metagenome - kraken report - samplesheet": { + "content": [ + { + "0": [ + [ + { + "id": "kraken2", + "profiler": "kraken2" + }, + "kraken2.tsv:md5,3a31a2bbff49f6e03083a2e03f4f6563" + ] + ], + "1": [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ], + "merged_profiles": [ + [ + { + "id": "kraken2", + "profiler": "kraken2" + }, + "kraken2.tsv:md5,3a31a2bbff49f6e03083a2e03f4f6563" + ] + ], + "versions": [ + "versions.yml:md5,35ebf4cc0297b6601cef13c6a3ab157d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-10T22:25:57.576974385" + } +} \ No newline at end of file diff --git a/modules/nf-core/taxpasta/merge/tests/tags.yml b/modules/nf-core/taxpasta/merge/tests/tags.yml new file mode 100644 index 00000000..5d17220c --- /dev/null +++ b/modules/nf-core/taxpasta/merge/tests/tags.yml @@ -0,0 +1,2 @@ +taxpasta/merge: + - "modules/nf-core/taxpasta/merge/**" diff --git a/modules/nf-core/taxpasta/standardise/environment.yml b/modules/nf-core/taxpasta/standardise/environment.yml new file mode 100644 index 00000000..a48f08e0 --- /dev/null +++ b/modules/nf-core/taxpasta/standardise/environment.yml @@ -0,0 +1,7 @@ +name: taxpasta_standardise +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::taxpasta=0.7.0 diff --git a/modules/nf-core/taxpasta/standardise/main.nf b/modules/nf-core/taxpasta/standardise/main.nf index 7822912a..7b393517 100644 --- a/modules/nf-core/taxpasta/standardise/main.nf +++ b/modules/nf-core/taxpasta/standardise/main.nf @@ -2,13 +2,15 @@ process TAXPASTA_STANDARDISE { tag "$meta.id" label 'process_single' - conda "bioconda::taxpasta=0.6.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/taxpasta:0.6.1--pyhdfd78af_0': - 'biocontainers/taxpasta:0.6.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/taxpasta:0.7.0--pyhdfd78af_0': + 'biocontainers/taxpasta:0.7.0--pyhdfd78af_0' }" input: tuple val(meta), path(profile) + val profiler + val format path taxonomy output: @@ -19,17 +21,13 @@ process TAXPASTA_STANDARDISE { task.ext.when == null || task.ext.when script: - // N.B.: Taxpasta requires a --profiler option and will fail without it. - // This must be specified via a `nextflow.config` or `modules.config`, for - // example, as "--profiler kraken2". Additionally, it requires a --output - // option with the output file name. The desired format will be parsed from - // the name and should correspond to the output pattern specified above, - // e.g., "--output ${task.ext.prefix}.tsv". def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : '' """ taxpasta standardise \\ + --profiler $profiler \\ + --output ${prefix}.${format} \\ $args \\ $taxonomy_option \\ $profile @@ -39,4 +37,17 @@ process TAXPASTA_STANDARDISE { taxpasta: \$(taxpasta --version) END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def taxonomy_option = taxonomy ? "--taxonomy ${taxonomy}" : '' + """ + touch ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + taxpasta: \$(taxpasta --version) + END_VERSIONS + """ } diff --git a/modules/nf-core/taxpasta/standardise/meta.yml b/modules/nf-core/taxpasta/standardise/meta.yml index 81df6e2c..b855905a 100644 --- a/modules/nf-core/taxpasta/standardise/meta.yml +++ b/modules/nf-core/taxpasta/standardise/meta.yml @@ -14,9 +14,7 @@ tools: homepage: "https://taxpasta.readthedocs.io/" documentation: "https://taxpasta.readthedocs.io/" tool_dev_url: "https://github.com/taxprofiler/taxpasta" - - licence: "['Apache-2.0']" - + licence: ["Apache-2.0"] input: - meta: type: map @@ -27,11 +25,18 @@ input: type: file description: profiler output file (mandatory) pattern: "*" + - profiler: + type: string + description: Name of the profiler used to generate the profile (mandatory) + pattern: "bracken|centrifuge|diamond|ganon|kaiju|kmcp|kraken2|krakenuniq|megan6|metaphlan|motus" + - format: + type: string + description: Type of output file to be generated + pattern: "tsv|csv|ods|xlsx|arrow|parquet|biom" - taxonomy: type: directory description: Directory containing at a minimum nodes.dmp and names.dmp files (optional) pattern: "*/" - output: - meta: type: map @@ -46,6 +51,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@Midnighter" +maintainers: + - "@Midnighter" diff --git a/modules/nf-core/taxpasta/standardise/tests/main.nf.test b/modules/nf-core/taxpasta/standardise/tests/main.nf.test new file mode 100644 index 00000000..e06ca7d6 --- /dev/null +++ b/modules/nf-core/taxpasta/standardise/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process TAXPASTA_STANDARDISE" + script "../main.nf" + process "TAXPASTA_STANDARDISE" + tag "modules" + tag "modules_nfcore" + tag "taxpasta" + tag "taxpasta/standardise" + + test("sarscov2 - metagenome - kraken report") { + + + when { + process { + """ + input[0] = [[id: 'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true)] + input[1] = "kraken2" + input[2] = 'tsv' + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - metagenome - kraken report - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[id: 'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true)] + input[1] = "kraken2" + input[2] = 'tsv' + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap b/modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap new file mode 100644 index 00000000..498711e3 --- /dev/null +++ b/modules/nf-core/taxpasta/standardise/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - metagenome - kraken report": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,6b62032ed403f431eeb0e17464ccc69b" + ] + ], + "1": [ + "versions.yml:md5,bf00feb66945aab46a78efafac5a261f" + ], + "standardised_profile": [ + [ + { + "id": "test" + }, + "test.tsv:md5,6b62032ed403f431eeb0e17464ccc69b" + ] + ], + "versions": [ + "versions.yml:md5,bf00feb66945aab46a78efafac5a261f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-11T12:35:34.381682299" + }, + "sarscov2 - metagenome - kraken report - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,bf00feb66945aab46a78efafac5a261f" + ], + "standardised_profile": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,bf00feb66945aab46a78efafac5a261f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-11T12:35:41.579178159" + } +} \ No newline at end of file diff --git a/modules/nf-core/taxpasta/standardise/tests/tags.yml b/modules/nf-core/taxpasta/standardise/tests/tags.yml new file mode 100644 index 00000000..43ec42dd --- /dev/null +++ b/modules/nf-core/taxpasta/standardise/tests/tags.yml @@ -0,0 +1,2 @@ +taxpasta/standardise: + - "modules/nf-core/taxpasta/standardise/**" diff --git a/nextflow.config b/nextflow.config index 0176e77a..638089c8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -26,16 +26,17 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - monochromeLogs = false // required so nf-validation nextflow.enabled.strict works nicely together - hook_url = null - help = false - version = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + monochromeLogs = false // required so nf-validation nextflow.enabled.strict works nicely together // Config options config_profile_name = null @@ -134,7 +135,8 @@ params { krakenuniq_batch_size = 20 // Bracken - run_bracken = false + run_bracken = false + bracken_save_intermediatekraken2 = false // centrifuge run_centrifuge = false @@ -161,7 +163,6 @@ params { // kmcp run_kmcp = false - kmcp_mode = 3 // default kmcp profiling value kmcp_save_search = false // ganon @@ -201,104 +202,110 @@ try { } // Load nf-core/taxprofiler custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! try { includeConfig "${params.custom_config_base}/pipeline/taxprofiler.config" } catch (Exception e) { System.err.println("WARNING: Could not load nf-core/config/taxprofiler profiles: ${params.custom_config_base}/pipeline/taxprofiler.config") - } +} profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - channels = ['conda-forge', 'bioconda', 'defaults'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } @@ -378,9 +385,8 @@ manifest { description = """Taxonomic classification and profiling of shotgun short- and long-read metagenomic data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.7' + version = '1.1.8' doi = '10.1101/2023.10.20.563221' - } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 1ce1ee54..deef481a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -466,6 +466,11 @@ "description": "Turn on Bracken (and the required Kraken2 prerequisite step).", "fa_icon": "fas fa-toggle-on" }, + "bracken_save_intermediatekraken2": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Turn on the saving of the intermediate Kraken2 files used as input to Bracken itself into Kraken2 results folder" + }, "run_malt": { "type": "boolean", "fa_icon": "fas fa-toggle-on", @@ -523,13 +528,6 @@ "description": "Turn on classification with KMCP.", "fa_icon": "fas fa-toggle-on" }, - "kmcp_mode": { - "type": "integer", - "default": 3, - "description": "Specify which KMCP profiling mode to use.", - "help_text": "Available values: \n0 (for pathogen detection)\n1 (higherrecall)\n2 (high recall)\n3 (default)\n4 (high precision)\n5 (higher precision).\nFor more information about the different profiling modes, please see the [kmcp documentation](https://bioinf.shenwei.me/kmcp/usage/#profile)\n\n> Modifies tool parameter(s):\n- kmcp profile: `--mode`\n\n", - "fa_icon": "fas fa-check-square" - }, "kmcp_save_search": { "type": "boolean", "fa_icon": "fas fa-save", @@ -858,6 +856,13 @@ "description": "Validation of parameters in lenient more.", "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } }, diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 56110621..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,15 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.ruff] -line-length = 120 -target-version = "py38" -cache-dir = "~/.cache/ruff" - -[tool.ruff.lint] -select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] - -[tool.ruff.lint.isort] -known-first-party = ["nf_core"] - -[tool.ruff.lint.per-file-ignores] -"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 30963ec6..72261013 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -20,7 +20,7 @@ workflow LONGREAD_PREPROCESSING { PORECHOP_PORECHOP ( reads ) ch_processed_reads = PORECHOP_PORECHOP.out.reads - .map { meta, reads -> [ meta + [single_end: 1], reads ] } + .map { meta, reads -> [ meta + [single_end: true], reads ] } ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log ) @@ -34,7 +34,7 @@ workflow LONGREAD_PREPROCESSING { } else { PORECHOP_PORECHOP ( reads ) ch_clipped_reads = PORECHOP_PORECHOP.out.reads - .map { meta, reads -> [ meta + [single_end: 1], reads ] } + .map { meta, reads -> [ meta + [single_end: true], reads ] } ch_processed_reads = FILTLONG ( ch_clipped_reads.map { meta, reads -> [ meta, [], reads ] } ).reads diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index e306f1de..09b8ac19 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -60,7 +60,7 @@ workflow PROFILING { COMBINE READS WITH POSSIBLE DATABASES */ - // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] + // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':true], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads .map { meta, reads -> @@ -362,7 +362,8 @@ workflow PROFILING { ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq .map { meta, reads, db_meta, db -> - [[id: db_meta.db_name, single_end: meta.single_end], reads, db_meta, db] + def seqtype = (reads[0].name ==~ /.+?\.f\w{0,3}a(\.gz)?$/) ? 'fasta' : 'fastq' + [[id: db_meta.db_name, single_end: meta.single_end, seqtype: seqtype], reads, db_meta, db] } .groupTuple(by: [0,2,3]) .flatMap { single_meta, reads, db_meta, db -> @@ -373,13 +374,14 @@ workflow PROFILING { meta, reads, db -> reads: [ meta, reads ] db: db + seqtype: meta.seqtype } // Hardcode to _always_ produce the report file (which is our basic output, and goes into) - KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.seqtype, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) - ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) - ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) + ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]} ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report.map{meta, profiles -> [meta - meta.subMap('seqtype'), profiles]} ) } @@ -414,7 +416,7 @@ workflow PROFILING { db: it[3] } - KMCP_SEARCH ( ch_input_for_kmcp.db, ch_input_for_kmcp.reads ) + KMCP_SEARCH ( ch_input_for_kmcp.reads, ch_input_for_kmcp.db ) ch_versions = ch_versions.mix( KMCP_SEARCH.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix(KMCP_SEARCH.out.result) @@ -451,7 +453,7 @@ workflow PROFILING { } //Generate kmcp profile - KMCP_PROFILE( ch_input_for_kmcp_profile.report, ch_input_for_kmcp.db, params.kmcp_mode ) + KMCP_PROFILE( ch_input_for_kmcp_profile.report, ch_input_for_kmcp.db ) ch_versions = ch_versions.mix( KMCP_PROFILE.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( KMCP_PROFILE.out.profile ) ch_multiqc_files = ch_multiqc_files.mix( KMCP_PROFILE.out.profile ) diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 95cd9d3f..ac204497 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -93,11 +93,23 @@ workflow STANDARDISATION_PROFILES { standardise: true } + ch_input_for_taxpasta_merge = ch_input_for_taxpasta.merge + .multiMap{ meta, profiles -> + profiles: [meta, profiles] + tool: meta.tool + } - TAXPASTA_MERGE (ch_input_for_taxpasta.merge , ch_taxpasta_tax_dir, []) + ch_input_for_taxpasta_standardise = ch_input_for_taxpasta.standardise + .multiMap{ meta, profiles -> + profiles: [meta, profiles] + tool: meta.tool + } + + + TAXPASTA_MERGE ( ch_input_for_taxpasta_merge.profiles , ch_input_for_taxpasta_merge.tool , params.standardisation_taxpasta_format, ch_taxpasta_tax_dir, [] ) + TAXPASTA_STANDARDISE ( ch_input_for_taxpasta_standardise.profiles, ch_input_for_taxpasta_standardise.tool, params.standardisation_taxpasta_format, ch_taxpasta_tax_dir ) ch_versions = ch_versions.mix( TAXPASTA_MERGE.out.versions.first() ) - TAXPASTA_STANDARDISE (ch_input_for_taxpasta.standardise, ch_taxpasta_tax_dir ) - ch_version = ch_versions.mix( TAXPASTA_STANDARDISE.out.versions.first() ) + ch_versions = ch_versions.mix( TAXPASTA_STANDARDISE.out.versions.first() ) diff --git a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf index 822672e2..96ef64f2 100644 --- a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf @@ -134,6 +134,10 @@ workflow PIPELINE_COMPLETION { imNotification(summary_params, hook_url) } } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } } /* @@ -348,8 +352,16 @@ def methodsDescriptionText( mqc_methods_yaml ) { meta["manifest_map"] = workflow.manifest.toMap() // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: Stamouli et al. 2023)" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " meta["tool_citations"] = "" meta["tool_bibliography"] = "" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..14558c39 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) { // Citation string for pipeline // def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + - " ${workflow.manifest.doi}\n\n" + + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 6e079164..fdb4266c 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -316,22 +316,44 @@ workflow TAXPROFILER { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.fromPath("${workflow.projectDir}/docs/images/nf-core-taxprofiler_logo_custom_light.png", checkIfExists: true) + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) if ( !params.skip_preprocessing_qc ) { if ( params.preprocessing_qc_tool == 'falco' ) {