diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..ea27a584 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..b6b31907 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,24 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,html,css,scss,js}] +indent_size = 2 + +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +indent_size = unset + +[/assets/email*] +indent_size = unset diff --git a/.gitattributes b/.gitattributes index 7fe55006..7a2dabc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 00000000..191fabd2 --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index bd99230c..aaf5c358 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,49 +1,118 @@ -# qbic-pipelines/bamtofastq: Contributing Guidelines +# nf-core/bamtofastq: Contributing Guidelines -Hi there! Many thanks for taking an interest in improving qbic-pipelines/bamtofastq. +Hi there! +Many thanks for taking an interest in improving nf-core/bamtofastq. -We try to manage the required tasks for qbic-pipelines/bamtofastq using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time. - -However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) - -> If you need help using or modifying qbic-pipelines/bamtofastq then the best place to ask is here. +We try to manage the required tasks for nf-core/bamtofastq using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) +> If you need help using or modifying nf-core/bamtofastq then the best place to ask is on the nf-core Slack [#bamtofastq](https://nfcore.slack.com/channels/bamtofastq) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow -If you'd like to write some code for qbic-pipelines/bamtofastq, the standard workflow -is as follows: -1. Check that there isn't already an issue about your idea in the - [qbic-pipelines/bamtofastq issues](https://github.com/qbic-pipelines/bamtofastq/issues) to avoid - duplicating work. - * If there isn't one already, please create one so that others know you're working on this -2. Fork the [qbic-pipelinesbamtofastq repository](https://github.com/qbic-pipelines/bamtofastq) to your GitHub account -3. Make the necessary changes / additions within your forked repository -4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged. +If you'd like to write some code for nf-core/bamtofastq, the standard workflow is as follows: -If you're not used to this workflow with git, you can start with some [basic docs from GitHub](https://help.github.com/articles/fork-a-repo/) or even their [excellent interactive tutorial](https://try.github.io/). +1. Check that there isn't already an issue about your idea in the [nf-core/bamtofastq issues](https://github.com/nf-core/bamtofastq/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/bamtofastq repository](https://github.com/nf-core/bamtofastq) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). ## Tests -When you create a pull request with changes, [Travis CI](https://travis-ci.org/) will run automatic tests. + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. There are typically two types of tests that run: -### Lint Tests -The nf-core has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. +### Lint tests -*This pipeline* was created using [nf-core/tools](https://github.com/nf-core/tools) and in the future will keep using it to continuously adhere to their best practices. However, you may encounter more warnings and failures, as the linting requires the `nf-core/` tag, whereas here we generally need to use `qbic-pipelines/`. +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. -### Pipeline Tests -Each nf-core pipeline should be set up with a minimal set of test-data. -Travis CI then runs the pipeline on this data to ensure that it exists successfully. +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. If there are any failures then the automated tests fail. -These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help -For further information/help, please consult the [qbic-pipelines/bamtofastq documentation](https://github.com/qbic-pipelines/bamtofastq#documentation) and don't hesitate to get in touch. + +For further information/help, please consult the [nf-core/bamtofastq documentation](https://nf-co.re/bamtofastq/usage) and don't hesitate to get in touch on the nf-core Slack [#bamtofastq](https://nfcore.slack.com/channels/bamtofastq) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the nf-core/bamtofastq code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/bamtofastq/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 6cab6224..00000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,31 +0,0 @@ -Hi there! - -Thanks for telling us about a problem with the pipeline. Please delete this text and anything that's not relevant from the template below: - -#### Describe the bug -A clear and concise description of what the bug is. - -#### Steps to reproduce -Steps to reproduce the behaviour: -1. Command line: `nextflow run ...` -2. See error: _Please provide your error message_ - -#### Expected behaviour -A clear and concise description of what you expected to happen. - -#### System: - - Hardware: [e.g. HPC, Desktop, Cloud...] - - Executor: [e.g. slurm, local, awsbatch...] - - OS: [e.g. CentOS Linux, macOS, Linux Mint...] - - Version [e.g. 7, 10.13.6, 18.3...] - -#### Nextflow Installation: - - Version: [e.g. 0.31.0] - -#### Container engine: - - Engine: [e.g. Conda, Docker or Singularity] - - version: [e.g. 1.0.0] - - Image tag: [e.g. nfcore/bamtofastq:1.0.0] - -#### Additional context -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..f914c8f1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/bamtofastq pipeline documentation](https://nf-co.re/bamtofastq/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 22.10.1)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/bamtofastq _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..75abf1ec --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #bamtofastq channel" + url: https://nfcore.slack.com/channels/bamtofastq + about: Discussion about the nf-core/bamtofastq pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 1f025b77..00000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,16 +0,0 @@ -Hi there! - -Thanks for suggesting a new feature for the pipeline! Please delete this text and anything that's not relevant from the template below: - -#### Is your feature request related to a problem? Please describe. -A clear and concise description of what the problem is. -Ex. I'm always frustrated when [...] - -#### Describe the solution you'd like -A clear and concise description of what you want to happen. - -#### Describe alternatives you've considered -A clear and concise description of any alternative solutions or features you've considered. - -#### Additional context -Add any other context about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000..3358b967 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the nf-core/bamtofastq pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index a6f34695..d68432cd 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,15 +1,25 @@ -Many thanks to contributing to qbic-pipelines/bamtofastq! + ## PR checklist - - [ ] This comment contains a description of changes (with reason) - - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If necessary, add test data to `testdata/` - - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). - - [ ] Make sure your code lints (`nf-core lint .`). - - [ ] Documentation in `docs` is updated - - [ ] `CHANGELOG.md` is updated - - [ ] `README.md` is updated - -**Learn more about contributing:** https://github.com/qbic-pipelines/bamtofastq/tree/master/.github/CONTRIBUTING.md + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/bamtofastq/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/bamtofastq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml deleted file mode 100644 index e052a635..00000000 --- a/.github/markdownlint.yml +++ /dev/null @@ -1,9 +0,0 @@ -# Markdownlint configuration file -default: true, -line-length: false -no-multiple-blanks: 0 -blanks-around-headers: false -blanks-around-lists: false -header-increment: false -no-duplicate-header: - siblings_only: true diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 00000000..daed6d58 --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,31 @@ +name: nf-core AWS full size tests +# This workflow is triggered on published releases. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + workflow_dispatch: +jobs: + run-tower: + name: Run AWS full tests + if: github.repository == 'nf-core/bamtofastq' + runs-on: ubuntu-latest + steps: + - name: Launch workflow via tower + uses: seqeralabs/action-tower-launch@v1 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/bamtofastq/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/bamtofastq/results-${{ github.sha }}" + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100644 index 00000000..dcb66ede --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,29 @@ +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-tower: + name: Run AWS tests + if: github.repository == 'nf-core/bamtofastq' + runs-on: ubuntu-latest + steps: + # Launch workflow using Tower CLI tool action + - name: Launch workflow via tower + uses: seqeralabs/action-tower-launch@v1 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/bamtofastq/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/bamtofastq/results-test-${{ github.sha }}" + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index c66e4f2c..0304cb9f 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -1,8 +1,8 @@ -name: qbic-pipelines branch protection +name: nf-core branch protection # This workflow is triggered on PRs to master branch on the repository # It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` on: - pull_request: + pull_request_target: branches: [master] jobs: @@ -11,9 +11,9 @@ jobs: steps: # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs - if: github.repository == 'qbic-pipelines/bamtofastq' + if: github.repository == 'nf-core/bamtofastq' run: | - { [[ ${{github.event.pull_request.head.repo.full_name}} == qbic-pipelines/bamtofastq ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/bamtofastq ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets @@ -22,11 +22,23 @@ jobs: uses: mshick/add-pr-comment@v1 with: message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + Hi @${{ github.event.pull_request.user.login }}, - It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master` branch. - The `master` branch on qbic-pipelines repositories should always contain code from the latest release. - Because of this, PRs to `master` are only allowed if they come from the ${{github.event.pull_request.head.repo.full_name}} `dev` branch. + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false \ No newline at end of file + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 286efb54..e45d9cab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -name: qbic-pipelines CI +name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: push: @@ -8,48 +8,42 @@ on: release: types: [published] +env: + NXF_ANSI_LOG: false + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + jobs: test: - name: Run workflow tests - # Only run on push if this is the qbic-pipelines dev branch (merged PRs) + name: Run pipeline with test data + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/bamtofastq') }}" runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false strategy: matrix: - # Nextflow versions: check pipeline minimum and current latest - nxf_ver: ['20.04.1', ''] - config: ['test_chr','test_bai','test_cram'] + NXF_VER: + - "22.10.1" + - "latest-everything" + profile: + - "test" + - "test_cram" + - "test_chr" + - "test_no_bai" + - "test_no_crai" + - "test_no_stats" + - "test_no_qc" + - "test_collate_fast" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - - name: Check if Dockerfile or Conda environment changed - uses: technote-space/get-diff-action@v4 + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 with: - FILES: | - Dockerfile - environment.yml - - name: Build new docker image - if: env.MATCHED_FILES - run: docker build --no-cache . -t qbicpipelines/bamtofastq:1.2.0 + version: "${{ matrix.NXF_VER }}" - - name: Pull docker image - if: ${{ !env.MATCHED_FILES }} - run: | - docker pull qbicpipelines/bamtofastq:1.2.0 - docker tag qbicpipelines/bamtofastq:1.2.0 qbicpipelines/bamtofastq:1.2.0 - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker - - name: Run pipeline with test data, only obtain reads mapping to chrX and chrY run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.config }},docker + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..694e90ec --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 00000000..6c3d7d27 --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,55 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + deploy: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/bamtofastq' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@v3 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + - uses: actions/setup-node@v3 + + - name: Install Prettier + run: npm install -g prettier @prettier/plugin-php + + # Check that we actually need to fix something + - name: Run 'prettier --check' + id: prettier_status + run: | + if prettier --check ${GITHUB_WORKSPACE}; then + echo "result=pass" >> $GITHUB_OUTPUT + else + echo "result=fail" >> $GITHUB_OUTPUT + fi + + - name: Run 'prettier --write' + if: steps.prettier_status.outputs.result == 'fail' + run: prettier --write ${GITHUB_WORKSPACE} + + - name: Commit & push changes + if: steps.prettier_status.outputs.result == 'fail' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix linting with Prettier" + git push diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 2f5bce3e..888cb4bc 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,51 +1,108 @@ -name: qbic-pipelines linting +name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] jobs: - Markdown: + EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v1 + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + + - name: Install editorconfig-checker + run: npm install -g editorconfig-checker + + - name: Run ECLint check + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') + + Prettier: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + + - name: Install Prettier + run: npm install -g prettier + + - name: Run Prettier --check + run: prettier --check ${GITHUB_WORKSPACE} + + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 with: - node-version: '10' - - name: Install markdownlint - run: npm install -g markdownlint-cli - - name: Run Markdownlint - run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml - YAML: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + + nf-core: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - uses: actions/setup-node@v1 + - name: Check out pipeline code + uses: actions/checkout@v3 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@v4 + with: + python-version: "3.8" + architecture: "x64" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core + + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@v3 with: - node-version: '10' - - name: Install yaml-lint - run: npm install -g yaml-lint - - name: Run yaml-lint - run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml") -#Comment this for now, as it won't pass until the template update is done, which will come in the next release -# nf-core: -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v2 -# - name: Install Nextflow -# run: | -# wget -qO- get.nextflow.io | bash -# sudo mv nextflow /usr/local/bin/ -# - uses: actions/setup-python@v1 -# with: -# python-version: '3.6' -# architecture: 'x64' -# - name: Install dependencies -# run: | -# python -m pip install --upgrade pip -# pip install nf-core -# - name: Run nf-core lint -# run: nf-core lint ${GITHUB_WORKSPACE} \ No newline at end of file + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 00000000..0bbcd30f --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@v2 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/push_dockerhub.yml b/.github/workflows/push_dockerhub.yml deleted file mode 100644 index 7164781d..00000000 --- a/.github/workflows/push_dockerhub.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: qbic-pipelines Docker push -# This builds the docker image and pushes it to DockerHub -# Runs on qbic-pipelines repo releases and push event to 'dev' branch (PR merges) -on: - push: - branches: - - dev - release: - types: [published] - -jobs: - push_dockerhub: - name: Push new Docker image to Docker Hub - runs-on: ubuntu-latest - # Only run for the qbic-pipelines repo, for releases and merged PRs - if: ${{ github.repository == 'qbic-pipelines/bamtofastq' }} - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Build new docker image - run: docker build --no-cache . -t qbicpipelines/bamtofastq:latest - - - name: Push Docker image to DockerHub (dev) - if: ${{ github.event_name == 'push' }} - run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker tag qbicpipelines/bamtofastq:latest qbicpipelines/bamtofastq:dev - docker push qbicpipelines/bamtofastq:dev - - - name: Push Docker image to DockerHub (release) - if: ${{ github.event_name == 'release' }} - run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker push qbicpipelines/bamtofastq:latest - docker tag qbicpipelines/bamtofastq:latest qbicpipelines/bamtofastq:${{ github.event.release.tag_name }} - docker push qbicpipelines/bamtofastq:${{ github.event.release.tag_name }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 5b54e3e6..8dd14f90 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,8 @@ work/ data/ results/ .DS_Store -tests/test_data +testing/ +testing* *.pyc +*.fasta +*.fai diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 00000000..85d95ecc --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,14 @@ +image: nfcore/gitpod:latest + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - codezombiech.gitignore # Language support for .gitignore files + # - cssho.vscode-svgviewer # SVG viewer + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100644 index 00000000..8cd6ed5a --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1,4 @@ +repository_type: pipeline +lint: + files_unchanged: + - .github/CONTRIBUTING.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..0c31cdb9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.7.1" + hooks: + - id: prettier diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..437d763d --- /dev/null +++ b/.prettierignore @@ -0,0 +1,12 @@ +email_template.html +adaptivecard.json +slackreport.json +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc +bin/ diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 00000000..c81f9a76 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/CHANGELOG.md b/CHANGELOG.md index 56e40e9f..01d1dd3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # nf-core/bamtofastq: Changelog +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v2.0.0 - Annie Easley + +Initial release of nf-core/bamtofastq, created with the [nf-core](https://nf-co.re/) template. + +### `Added` + +- [#49](https://github.com/nf-core/bamtofastq/pull/49) Add descriptions to main options +- [#48](https://github.com/nf-core/bamtofastq/pull/48) Add igenomes +- [#45](https://github.com/nf-core/bamtofastq/pull/45) Add `test.yml` files with md5sums +- [#44](https://github.com/nf-core/bamtofastq/pull/44) DSL2 conversion + +### `Changed` + +- [#55](https://github.com/nf-core/bamtofastq/pull/55) Code review suggestions & formatting +- [#54](https://github.com/nf-core/bamtofastq/pull/54) Code review changes +- [#53](https://github.com/nf-core/bamtofastq/pull/53) Code review & updated modules +- [#52](https://github.com/nf-core/bamtofastq/pull/52) Code review changed resources in configs +- [#47](https://github.com/nf-core/bamtofastq/pull/47) Sync TEMPLATE with tools 2.8 + +### `Fixed` + +- [#49](https://github.com/nf-core/bamtofastq/pull/49) Fixed release version +- [#45](https://github.com/nf-core/bamtofastq/pull/45) Minor bugfix with chromosome extraction + +### `Dependencies` + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `multiqc` | 1.9 | 1.14 | +| `samtools` | 1.10 | 1.17 | + +### `Deprecated` + +- Option `--cram_files` not needed anymore due to automatic format detection. + ## v1.2.0 - Anna Winlock - [#36](https://github.com/qbic-pipelines/bamtofastq/pull/36) Add options `--cram_files` and `--reference_fasta` to add support for CRAM files. @@ -7,7 +45,7 @@ - [#32](https://github.com/qbic-pipelines/bamtofastq/pull/32) Added `--samtools_collate_fast` to sortExtractMapped and changed cat command to append. - [#33](https://github.com/qbic-pipelines/bamtofastq/pull/33) Added flag `--reads_in_memory` to specify how many reads shall be stored in memory. -## v1.1.0 - Katherine Johnson +## v1.1.0 - Katherine Johnson - [#21](https://github.com/qbic-pipelines/bamtofastq/21) Allows bam indices as additional input files - [#23](https://github.com/qbic-pipelines/bamtofastq/23) Fix documentation: `--bam` is `--input` now diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 00000000..27be65c2 --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,47 @@ +# nf-core/bamtofastq: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +- [SAMtools](https://doi.org/10.1093/gigascience/giab008) + + > Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. D., Li, H., (2021) Twelve years of SAMtools and BCFtools, GigaScience, Volume 10, Issue 2, giab008. doi: 10.1093/gigascience/giab008 + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 90ddb228..f4fd052f 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,46 +1,111 @@ -# Contributor Covenant Code of Conduct +# Code of Conduct at nf-core (v1.0) ## Our Pledge -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: -## Our Standards +- Age +- Body size +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Physical and neurological ability +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status -Examples of behavior that contributes to creating a positive environment include: +Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members +## Preamble -Examples of unacceptable behavior by participants include: +> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. + +nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. + +We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. + +Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. + +We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. + +Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re ## Our Responsibilities -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. +The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. + +The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. + +## When are where does this Code of Conduct apply? + +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: + +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. + +## nf-core cares 😊 + +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): + +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Take breaks when you feel like you need them. +- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) + +## nf-core frowns on 😕 + +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. + +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. + +### Online Trolling + +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. + +All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. + +## Procedures for Reporting CoC violations -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -## Scope +You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). -This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. +Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. -## Enforcement +All reports will be handled with utmost discretion and confidentially. -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team via email. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +## Attribution and Acknowledgements -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) -## Attribution +## Changelog -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] +### v1.0 - March 12th, 2021 -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index f23f7e70..00000000 --- a/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM nfcore/base:1.7 -LABEL authors="Friederike Hanssen" \ - description="Docker image containing all requirements for qbic-pipelines/bamtofastq pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/qbic-pipelines-bamtofastq-1.2.0/bin:$PATH diff --git a/LICENSE b/LICENSE index 34fccfc9..de7630f9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Friederike Hanssen +Copyright (c) Friederike Hanssen, Susanne Jodoin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 1335f5df..fc0f12c3 100644 --- a/README.md +++ b/README.md @@ -1,83 +1,114 @@ -# ![qbic-pipelines/bamtofastq](docs/images/qbic-pipelines-bamtofastq_logo.png) +# ![nf-core/bamtofastq](docs/images/nf-core-bamtofastq_logo_light.png#gh-light-mode-only) ![nf-core/bamtofastq](docs/images/nf-core-bamtofastq_logo_dark.png#gh-dark-mode-only) -> **An open-source pipeline converting (un)mapped single-end or paired-end bam/cram files to fastq.gz**. +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/bamtofastq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.4022138-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.4022138) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.1-brightgreen.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/bamtofastq) -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) -[![Docker](https://img.shields.io/docker/automated/qbicpipelines/bamtofastq.svg)](https://hub.docker.com/r/qbicpipelines/bamtofastq) -[![Install with Singularity](https://img.shields.io/badge/use%20with-singularity-purple.svg)](https://www.sylabs.io/docs/) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23bamtofastq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/bamtofastq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) -[![GitHub Actions CI status](https://github.com/qbic-pipelines/bamtofastq/workflows/qbic-pipelines%20CI/badge.svg)](https://github.com/qbic-pipelines/bamtofastq/actions?query=workflow%3A%22qbic-pipelines+CI%22) -[![GitHub Actions Linting status](https://github.com/qbic-pipelines/bamtofastq/workflows/qbic-pipelines%20linting/badge.svg)](https://github.com/qbic-pipelines/bamtofastq/actions?query=workflow%3A%22qbic-pipelines+linting%22) - -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4022137.svg)](https://doi.org/10.5281/zenodo.4022137) ## Introduction -This pipeline converts (un)mapped `.bam` files (or `.cram` files with the `--cram_files` option) into `fq.gz` files. -Initially, it auto-detects, whether the input file contains single-end or paired-end reads. Following this step, the reads are sorted using `samtools collate` and extracted with `samtools fastq`. Furthermore, for mapped bam/cram files it is possible to only convert reads mapping to a specific region or chromosome. The obtained FastQ files can then be used to further process with other pipelines. +**nf-core/bamtofastq** is a bioinformatics best-practice analysis pipeline that converts (un)mapped `.bam` or `.cram` files into `fq.gz` files. Initially, it auto-detects, whether the input file contains single-end or paired-end reads. Following this step, the reads are sorted using `samtools collate` and extracted with `samtools fastq`. Furthermore, for mapped bam/cram files it is possible to only convert reads mapping to a specific region or chromosome. The obtained FastQ files can then be used to further process with other pipelines. + +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! + +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/bamtofastq/results). + +## Pipeline summary -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. +By default, the pipeline currently performs the following steps: -## Quick Start +1. Quality control (QC) of input (bam/cram) files ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)). +2. Check if input files are single- or paired-end ([`Samtools`](https://www.htslib.org/)). +3. Compute statistics on input files ([`Samtools`](https://www.htslib.org/)). +4. Convert to fastq reads ([`Samtools`](https://www.htslib.org/)). +5. QC of converted fastq reads ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)). +6. Summarize QC and statistics before and after format conversion ([`MultiQC`](http://multiqc.info/)). -i. Install [`nextflow`](https://nf-co.re/usage/installation) +

+ +

-ii. Install one of [`docker`](https://docs.docker.com/engine/installation/), [`singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`conda`](https://conda.io/miniconda.html) +## Usage -iii. Download the pipeline and test it on a minimal dataset with a single command +> **Note** +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +> with `-profile test` before running the workflow on actual data. + +Download the pipeline and test it on a minimal dataset with a single command: ```bash -nextflow run qbic-pipelines/bamtofastq -profile test, +nextflow run nf-core/bamtofastq -profile test, --outdir './results' ``` -iv. Start running your own analysis! +To run your own analysis, start by preparing a samplesheet with your input data that looks as follows: -```bash -nextflow run qbic-pipelines/bamtofastq -profile --input '*.bam' +`samplesheet.csv`: + +```csv +sample_id,mapped,index,file_type +test,test1.bam,test1.bam.bai,bam +test2,test2.bam,test2.bam.bai,bam ``` -See [usage docs](docs/usage.md) for all of the available options when running the pipeline. +Each row represents a bam/cram file with or without indices. -## Documentation +Now, you can run the pipeline using: -The qbic-pipelines/bamtofastq pipeline comes with documentation about the pipeline, found in the `docs/` directory: +```bash +nextflow run nf-core/bamtofastq \ + -profile \ + --input samplesheet.csv \ + --outdir +``` -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) -3. [Running the pipeline](docs/usage.md) -4. [Output and how to interpret the results](docs/output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +> **Warning:** +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -## Credits +For more details, please refer to the [usage documentation](https://nf-co.re/bamtofastq/usage) and the [parameter documentation](https://nf-co.re/bamtofastq/parameters). -qbic-pipelines/bamtofastq was originally written by [Friederike Hanssen](https://github.com/FriederikeHanssen). +The nf-core/bamtofastq pipeline comes with documentation about the pipeline [usage](https://nf-co.re/bamtofastq/usage), [parameters](https://nf-co.re/bamtofastq/parameters) and [output](https://nf-co.re/bamtofastq/output). + +## Credits -This pipeline was created using the [nf-core](https://github.com/nf-core) framework and still uses some of its underlying infrastructure. For more information see [nf-co.re](nf-co.re). +nf-core/bamtofastq was originally written by Friederike Hanssen. It was ported to DSL2 by Susanne Jodoin. -Helpful contributors: +We thank the following people for their extensive assistance in the development of this pipeline: -* [Gisela Gabernet](https://github.com/ggabernet) -* [Matilda Åslin](https://github.com/matrulda) -* [Susanne Jodoin](https://github.com/SusiJo) -* [Bruno Grande](https://github.com/BrunoGrandePhd) +- [Gisela Gabernet](https://github.com/ggabernet) +- [Matilda Åslin](https://github.com/matrulda) +- [Bruno Grande](https://github.com/BrunoGrandePhd) ### Resources The individual steps of this pipeline are based of on the following tutorials and resources: - 1. [Extracting paired FASTQ read data from a BAM mapping file](http://darencard.net/blog/2017-09-07-extract-fastq-bam/) - 2. [Check if BAM is derived from pair-end or single-end reads](https://www.biostars.org/p/178730/) +1. [Extracting paired FASTQ read data from a BAM mapping file](http://darencard.net/blog/2017-09-07-extract-fastq-bam/) +2. [Check if BAM is derived from pair-end or single-end reads](https://www.biostars.org/p/178730/) ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). -For further information or help, don't hesitate to get in touch by opening an issue. +For further information or help, don't hesitate to get in touch on the [Slack `#bamtofastq` channel](https://nfcore.slack.com/channels/bamtofastq) (you can join with [this invite](https://nf-co.re/join/slack)). + +## Citations + +If you use nf-core/bamtofastq for your analysis, please cite it using the following doi: [10.5281/zenodo.284730479](https://doi.org/10.5281/zenodo.284730479) + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. -## Citation +You can cite the `nf-core` publication as follows: - -If you use qbic-pipelines/bamtofastq for your analysis, please cite it using the following doi: [10.5281/zenodo.4022137](https://doi.org/10.5281/zenodo.4022137) +> **The nf-core framework for community-curated bioinformatics pipelines.** +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..49ffa4ab --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/bamtofastq v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html index 8658ee33..1d2432c5 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,25 +1,24 @@ - - - qbic-pipelines/bamtofastq Pipeline Report + + nf-core/bamtofastq Pipeline Report
-

qbic-pipelines/bamtofastq v${version}

+

nf-core/bamtofastq v${version}

Run Name: $runName

<% if (!success){ out << """
-

qbic-pipelines/bamtofastq execution completed unsuccessfully!

+

nf-core/bamtofastq execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

${errorReport}
@@ -28,7 +27,7 @@

qbic-pipelines/bamtofastq execution co } else { out << """
- qbic-pipelines/bamtofastq execution completed successfully! + nf-core/bamtofastq execution completed successfully!
""" } @@ -45,8 +44,8 @@

Pipeline Configuration:

-

qbic-pipelines/bamtofastq

-

https://github.com/qbic-pipelines/bamtofastq

+

nf-core/bamtofastq

+

https://github.com/nf-core/bamtofastq

diff --git a/assets/email_template.txt b/assets/email_template.txt index 2c86803c..6f7aa8a8 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,16 +4,15 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - qbic-pipelines/bamtofastq v${version} + nf-core/bamtofastq v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ - out << "## qbic-pipelines/bamtofastq execution completed successfully! ##" + out << "## nf-core/bamtofastq execution completed successfully! ##" } else { out << """#################################################### -## qbic-pipelines/bamtofastq execution completed unsuccessfully! ## +## nf-core/bamtofastq execution completed unsuccessfully! ## #################################################### The exit status of the task that caused the workflow execution to fail was: $exitStatus. The full error message was: @@ -36,5 +35,5 @@ Pipeline Configuration: <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> -- -qbic-pipelines/bamtofastq -https://github.com/qbic-pipelines/bamtofastq +nf-core/bamtofastq +https://github.com/nf-core/bamtofastq diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..491cee44 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,23 @@ +id: "nf-core-bamtofastq-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/bamtofastq Methods Description" +section_href: "https://github.com/nf-core/bamtofastq" +plot_type: "html" +data: | +

Methods

+

Data was processed using nf-core/bamtofastq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index d6dc452d..00000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# custom_logo: ../../../docs/images/nf-core_sarek_logo.png -custom_logo_url: https://github.com/qbic-pipelines/bamtofastq/ -custom_logo_title: 'qbic-pipelines/bamtofastq' - -report_comment: > - This report has been generated by the qbic-pipelines/bamtofastq - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - qbic-pipelines/bamtofastq-software-versions: - order: -1000 - qbic-pipelines-bamtofastq-summary: - order: -1100 - -top_modules: - - 'fastqc': - name: 'FastQC (Input Bam)' - path_filters_exclude: - - '*singleton_fastqc*' - - '*.1_fastqc*' - - '*.2_fastqc*' - - 'samtools': - name: 'Samtools (Input Bam)' - - 'fastqc': - name: 'FastQC (Output Reads)' - path_filters: - - '*singleton_fastqc*' - - '*.1_fastqc*' - - '*.2_fastqc*' - -export_plots: true diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 00000000..40a3ce4f --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,15 @@ +report_comment: > + This report has been generated by the nf-core/bamtofastq + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + "nf-core-bamtofastq-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-bamtofastq-summary": + order: -1002 + +export_plots: true + +fn_clean_sample_names: false diff --git a/assets/nf-core-bamtofastq_logo.png b/assets/nf-core-bamtofastq_logo.png deleted file mode 100644 index cb6b39f8..00000000 Binary files a/assets/nf-core-bamtofastq_logo.png and /dev/null differ diff --git a/assets/nf-core-bamtofastq_logo_light.png b/assets/nf-core-bamtofastq_logo_light.png new file mode 100644 index 00000000..d3aec53e Binary files /dev/null and b/assets/nf-core-bamtofastq_logo_light.png differ diff --git a/assets/qbic-pipelines-bamtofastq_logo.png b/assets/qbic-pipelines-bamtofastq_logo.png deleted file mode 100644 index 15d18be4..00000000 Binary files a/assets/qbic-pipelines-bamtofastq_logo.png and /dev/null differ diff --git a/assets/qbic-pipelines-bamtofastq_logo.svg b/assets/qbic-pipelines-bamtofastq_logo.svg deleted file mode 100644 index 64a459e9..00000000 --- a/assets/qbic-pipelines-bamtofastq_logo.svg +++ /dev/null @@ -1,391 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - bamtofastq - - - qbic-pipelines/ - - - An open-source pipeline converting (un)mapped single-end or paired-end bam files to fastq.gz - - diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 00000000..0e3a0add --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +sample_id,mapped,index,file_type +test,https://github.com/nf-core/test-datasets/raw/bamtofastq/test-datasets/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://github.com/nf-core/test-datasets/raw/bamtofastq/test-datasets/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai,bam +test2,https://github.com/nf-core/test-datasets/raw/bamtofastq/test-datasets/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://github.com/nf-core/test-datasets/raw/bamtofastq/test-datasets/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai,bam diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 00000000..c22a9b4b --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/bamtofastq/master/assets/schema_input.json", + "title": "nf-core/bamtofastq pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "fastq_1": { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "fastq_2": { + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + } + }, + "required": ["sample", "fastq_1"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index 6d95e170..76269b9c 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -12,18 +12,18 @@ $email_html Content-Type: image/png;name="nf-core-bamtofastq_logo.png" Content-Transfer-Encoding: base64 Content-ID: -Content-Disposition: inline; filename="qbic-pipelines-bamtofastq_logo.png" +Content-Disposition: inline; filename="nf-core-bamtofastq_logo_light.png" -<% out << new File("$baseDir/assets/qbic-pipelines-bamtofastq_logo.png"). - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' ) %> +<% out << new File("$projectDir/assets/nf-core-bamtofastq_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> <% if (mqcFile){ @@ -37,15 +37,15 @@ Content-ID: Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" ${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} """ }} %> diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..043d02f2 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/markdown_to_html.r b/bin/markdown_to_html.r deleted file mode 100755 index abe13350..00000000 --- a/bin/markdown_to_html.r +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env Rscript - -# Command line argument processing -args = commandArgs(trailingOnly=TRUE) -if (length(args) < 2) { - stop("Usage: markdown_to_html.r ", call.=FALSE) -} -markdown_fn <- args[1] -output_fn <- args[2] - -# Load / install packages -if (!require("markdown")) { - install.packages("markdown", dependencies=TRUE, repos='http://cloud.r-project.org/') - library("markdown") -} - -base_css_fn <- getOption("markdown.HTML.stylesheet") -base_css <- readChar(base_css_fn, file.info(base_css_fn)$size) -custom_css <- paste(base_css, " -body { - padding: 3em; - margin-right: 350px; - max-width: 100%; -} -#toc { - position: fixed; - right: 20px; - width: 300px; - padding-top: 20px; - overflow: scroll; - height: calc(100% - 3em - 20px); -} -#toc_header { - font-size: 1.8em; - font-weight: bold; -} -#toc > ul { - padding-left: 0; - list-style-type: none; -} -#toc > ul ul { padding-left: 20px; } -#toc > ul > li > a { display: none; } -img { max-width: 800px; } -") - -markdownToHTML( - file = markdown_fn, - output = output_fn, - stylesheet = custom_css, - options = c('toc', 'base64_images', 'highlight_code') -) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py deleted file mode 100755 index 76472f3c..00000000 --- a/bin/scrape_software_versions.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -from collections import OrderedDict -import re - -regexes = { - 'qbic-pipelines/bamtofastq': ['v_pipeline.txt', r"(\S+)"], - 'Nextflow': ['v_nextflow.txt', r"(\S+)"], - 'Samtools': ['v_samtools.txt', r"samtools (\S+)"], - 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], - 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], -} -results = OrderedDict() -results['qbic-pipelines/bamtofastq'] = 'N/A' -results['Nextflow'] = 'N/A' -results['Samtools'] = 'N/A' -results['FastQC'] = 'N/A' -results['MultiQC'] = 'N/A' - -# Search each file using its regex -for k, v in regexes.items(): - try: - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) - except IOError: - results[k] = False - -# Remove software set to false in results -for k in results: - if not results[k]: - del(results[k]) - -# Dump to YAML -print (''' -id: 'software_versions' -section_name: 'qbic-pipelines/bamtofastq Software Versions' -section_href: 'https://github.com/qbic-pipelines/bamtofastq' -plot_type: 'html' -description: 'are collected at run time from the software output.' -data: | -
-''') -for k,v in results.items(): - print("
{}
{}
".format(k,v)) -print ("
") - -# Write out regexes as csv file: -with open('software_versions.csv', 'w') as f: - for k,v in results.items(): - f.write("{}\t{}\n".format(k,v)) diff --git a/conf/awsbatch.config b/conf/awsbatch.config deleted file mode 100644 index 14af5866..00000000 --- a/conf/awsbatch.config +++ /dev/null @@ -1,18 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running on AWS batch - * ------------------------------------------------- - * Base config needed for running with -profile awsbatch - */ -params { - config_profile_name = 'AWSBATCH' - config_profile_description = 'AWSBATCH Cloud Profile' - config_profile_contact = 'Alexander Peltzer (@apeltzer)' - config_profile_url = 'https://aws.amazon.com/de/batch/' -} - -aws.region = params.awsregion -process.executor = 'awsbatch' -process.queue = params.awsqueue -executor.awscli = '/home/ec2-user/miniconda/bin/aws' -params.tracedir = './' diff --git a/conf/base.config b/conf/base.config index eb70fb46..1900455e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,55 +1,63 @@ /* - * ------------------------------------------------- - * nf-core/bamtofastq Nextflow base config file - * ------------------------------------------------- - * A 'blank slate' config file, appropriate for general - * use on most high performace compute environments. - * Assumes that all software is installed and available - * on the PATH. Runs in `local` mode - all jobs will be - * run on the logged in environment. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/bamtofastq Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 7.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Only one of the labels below are used in the fastqc process in the main script. - // If possible, it would be nice to keep the same label naming convention when - // adding in your processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withLabel:process_low { - cpus = { check_max( 7 * task.attempt, 'cpus' ) } - memory = { check_max( 15.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 15 * task.attempt, 'cpus' ) } - memory = { check_max( 31.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 15 * task.attempt, 'cpus' ) } - memory = { check_max( 200.GB * task.attempt, 'memory' ) } - time = { check_max( 10.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - withName:get_software_versions { - cache = false - } -} - -params { - // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false + } } diff --git a/conf/igenomes.config b/conf/igenomes.config new file mode 100644 index 00000000..0929028e --- /dev/null +++ b/conf/igenomes.config @@ -0,0 +1,132 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using iGenome paths. + Can be used by any config that customises the base path using: + $params.igenomes_base / --igenomes_base +---------------------------------------------------------------------------------------- +*/ + +params { + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + } + } +} diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 00000000..8f547ecf --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,180 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: 'FASTQC_PRE_CONVERSION' { + ext.args = '--quiet' + ext.prefix = { "${meta.id}.pre_conversion" } + ext.when = { !params.no_read_QC } + } + + withName: 'FASTQC_POST_CONVERSION' { + ext.args = '--quiet' + ext.prefix = { "${meta.id}.post_conversion" } + ext.when = { !params.no_read_QC } + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + + withName: 'SAMTOOLS_COLLATEFASTQ_SINGLE_END' { + ext.args = { params.samtools_collate_fast ? "-f -r " + params.reads_in_memory : "" } + ext.args2 = '-N' + publishDir = [ + path: { "${params.outdir}/reads" }, + mode: params.publish_dir_mode, + pattern: '*{other.fq.gz}' + ] + } + + withName: 'COLLATE_FASTQ_MAP' { + ext.args = { params.samtools_collate_fast ? "-f -r " + params.reads_in_memory : "" } + ext.args2 = '-N' + ext.prefix = {"${meta.id}.mapped"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'COLLATE_FASTQ_UNMAP' { + ext.args = { params.samtools_collate_fast ? "-f -r " + params.reads_in_memory : "" } + ext.args2 = '-N' + ext.prefix = { "${meta.id}.unmapped" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_MAP_MAP' { + ext.args = '-b -f1 -F12' + ext.prefix = { "${meta.id}.map_map" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_FLAGSTAT' { + ext.when = { !params.no_stats } + publishDir = [ + path: { "${params.outdir}/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat' + ] + } + + withName: 'SAMTOOLS_IDXSTATS' { + ext.when = { !params.no_stats } + publishDir = [ + path: { "${params.outdir}/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.idxstats' + ] + } + + withName: 'SAMTOOLS_STATS' { + ext.when = { !params.no_stats } + publishDir = [ + path: { "${params.outdir}/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.stats' + ] + } + + withName: 'SAMTOOLS_INDEX' { + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_FAIDX' { + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + + withName: 'SAMTOOLS_VIEW_MAP_UNMAP' { + ext.args = '-b -f8 -F260' + ext.prefix = { "${meta.id}.map_unmap" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_MAP' { + ext.args = '-b -f4 -F264' + ext.prefix = { "${meta.id}.unmap_map" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_UNMAP' { + ext.args = '-b -f12 -F256' + ext.prefix = { "${meta.id}.unmap_unmap" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_MERGE_UNMAP' { + ext.prefix = { "${meta.id}.merged_unmap" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'CAT_FASTQ' { + publishDir = [ + path: { "${params.outdir}/reads" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + saveAs: { filename -> filename.indexOf(".fastq.gz") > 0 ? filename : null }, + enabled: true + ] + } + + withName: 'CHECK_IF_PAIRED_END'{ + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_CHR'{ + ext.args = "-hb" + ext.args2 = "${params.chr}" + ext.prefix = { params.chr.split(' |-|:').size() > 1 ? "${meta.id}."+(params.chr.split(' |-|:').join('_')) : "${meta.id}."+(params.chr) } + } + +} diff --git a/conf/test.config b/conf/test.config index f47a11a9..39b1fba6 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,24 +1,28 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run qbic-pipelines/bamtofastq -profile test - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on Travis - max_cpus = 2 - max_memory = 6.GB - max_time = 48.h + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet.csv" - // Input data - input = [ - 'https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/First_SmallTest_Paired.bam', - 'https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/Second_SmallTest_Paired.bam', - 'https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam' - ] + // Genome references + genome = null + igenomes_ignore = true } diff --git a/conf/test_bai.config b/conf/test_bai.config deleted file mode 100644 index 04093259..00000000 --- a/conf/test_bai.config +++ /dev/null @@ -1,30 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run qbic-pipelines/bamtofastq -profile test_bai - */ - - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on Travis - max_cpus = 2 - max_memory = 6.GB - max_time = 48.h - samtools_collate_fast = true - reads_in_memory = '10000' - no_stats = true - no_read_QC = true - - - index_files = true - input_paths = [ - ['First_SmallTest_Paired', ['https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/First_SmallTest_Paired.bam','https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/First_SmallTest_Paired.bai']], - ['Second_SmallTest_Paired', ['https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/Second_SmallTest_Paired.bam','https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/Second_SmallTest_Paired.bam.bai']], - ['wgEncodeUwRepliSeqK562G1AlnRep1', ['https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam','https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai']] - ] -} \ No newline at end of file diff --git a/conf/test_chr.config b/conf/test_chr.config index 2601c4a1..233826c4 100644 --- a/conf/test_chr.config +++ b/conf/test_chr.config @@ -1,14 +1,29 @@ /* * ------------------------------------------------- - * Nextflow config file for running tests + * Nextflow config file for running tests * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run qbic-pipelines/bamtofastq -profile test + * nextflow run nf-core/bamtofastq -profile test_chr, --outdir */ -includeConfig 'test.config' params { + config_profile_name = 'Test chromosome profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_chr_samplesheet.csv" + + // Genome references + genome = null + igenomes_ignore = true + + // Other parameters chr = 'chrX chrY X Y' -} \ No newline at end of file +} diff --git a/conf/test_collate_fast.config b/conf/test_collate_fast.config new file mode 100644 index 00000000..f4e6ee4c --- /dev/null +++ b/conf/test_collate_fast.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test_collate_fast, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test collate fast profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet.csv" + + // Genome references + genome = null + igenomes_ignore = true + + // Other parameters + samtools_collate_fast = true +} diff --git a/conf/test_cram.config b/conf/test_cram.config index d720cf36..8283d501 100644 --- a/conf/test_cram.config +++ b/conf/test_cram.config @@ -1,25 +1,27 @@ /* * ------------------------------------------------- - * Nextflow config file for running tests + * Nextflow config file for running tests * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run qbic-pipelines/bamtofastq -profile test_cram + * nextflow run nf-core/bamtofastq -profile test_cram, --outdir */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on Travis - max_cpus = 2 - max_memory = 6.GB - max_time = 48.h + config_profile_name = 'Test cram profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - cram_files = true - input = [ - 'https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/First_SmallTest_Paired.cram', - 'https://raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/Second_SmallTest_Paired.cram' - ] - reference_fasta = 'ftp://ftp.broadinstitute.org/pub/seq/references/Homo_sapiens_assembly19.fasta' + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_cram_samplesheet.csv' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + + // Genome references + genome = null + igenomes_ignore = true } diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 00000000..a7175af2 --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,26 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +cleanup = true + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_full_samplesheet.csv' + fasta = 's3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta' + + // Genome references + genome = null + igenomes_ignore = true +} diff --git a/conf/test_no_bai.config b/conf/test_no_bai.config new file mode 100644 index 00000000..e54f321d --- /dev/null +++ b/conf/test_no_bai.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test_no_bai, --outdir + +---------------------------------------------------------------------------------------- +*/ + + +params { + config_profile_name = 'Test no bai profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet_no_bai.csv' +} diff --git a/conf/test_no_crai.config b/conf/test_no_crai.config new file mode 100644 index 00000000..1c88695f --- /dev/null +++ b/conf/test_no_crai.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test_no_crai, --outdir + +---------------------------------------------------------------------------------------- +*/ + + + +params { + config_profile_name = 'Test no crai profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_cram_samplesheet_no_crai.csv' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + + // Genome references + genome = null + igenomes_ignore = true + +} diff --git a/conf/test_no_qc.config b/conf/test_no_qc.config new file mode 100644 index 00000000..0538cfac --- /dev/null +++ b/conf/test_no_qc.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test_no_qc, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test no QC profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet.csv" + + // Genome references + genome = null + igenomes_ignore = true + + // Other parameters + no_read_qc = true +} diff --git a/conf/test_no_stats.config b/conf/test_no_stats.config new file mode 100644 index 00000000..88c89145 --- /dev/null +++ b/conf/test_no_stats.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test_no_stats, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test no stats profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet.csv" + + // Genome references + genome = null + igenomes_ignore = true + + // Other parameters + no_stats = true +} diff --git a/docs/README.md b/docs/README.md index 1c5f1883..3e062171 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,11 +1,10 @@ -# qbic-pipelines/bamtofastq: Documentation +# nf-core/bamtofastq: Documentation -The qbic-pipelines/bamtofastq documentation is split into the following files: +The nf-core/bamtofastq documentation is split into the following pages: -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) -3. [Running the pipeline](usage.md) -4. [Output and how to interpret the results](output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png new file mode 100755 index 00000000..361d0e47 Binary files /dev/null and b/docs/images/mqc_fastqc_adapter.png differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png new file mode 100755 index 00000000..cb39ebb8 Binary files /dev/null and b/docs/images/mqc_fastqc_counts.png differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png new file mode 100755 index 00000000..a4b89bf5 Binary files /dev/null and b/docs/images/mqc_fastqc_quality.png differ diff --git a/docs/images/nf-core-bamtofastq-subway.png b/docs/images/nf-core-bamtofastq-subway.png new file mode 100644 index 00000000..630c8ea1 Binary files /dev/null and b/docs/images/nf-core-bamtofastq-subway.png differ diff --git a/docs/images/nf-core-bamtofastq-subway.svg b/docs/images/nf-core-bamtofastq-subway.svg new file mode 100644 index 00000000..fc017716 --- /dev/null +++ b/docs/images/nf-core-bamtofastq-subway.svg @@ -0,0 +1,1480 @@ + + + +bambambambaibamfastabamfaibamcrambamcraiindexpaired-endsingle-endoptionalsamtoolsPre-conversion QCAlignment to FastQPre-processingfaidxfastqcfastqcmultiQCflagstatidxstatsstatsextract chromosomecheck if paired-endviewcollate fastqcat fastqhtmlfastq diff --git a/docs/images/nf-core-bamtofastq_logo.png b/docs/images/nf-core-bamtofastq_logo.png deleted file mode 100644 index f8c2399c..00000000 Binary files a/docs/images/nf-core-bamtofastq_logo.png and /dev/null differ diff --git a/docs/images/nf-core-bamtofastq_logo_dark.png b/docs/images/nf-core-bamtofastq_logo_dark.png new file mode 100644 index 00000000..5f8f2bff Binary files /dev/null and b/docs/images/nf-core-bamtofastq_logo_dark.png differ diff --git a/docs/images/nf-core-bamtofastq_logo_light.png b/docs/images/nf-core-bamtofastq_logo_light.png new file mode 100644 index 00000000..a0564742 Binary files /dev/null and b/docs/images/nf-core-bamtofastq_logo_light.png differ diff --git a/docs/images/qbic-pipelines-bamtofastq_logo.png b/docs/images/qbic-pipelines-bamtofastq_logo.png deleted file mode 100644 index fcd011a7..00000000 Binary files a/docs/images/qbic-pipelines-bamtofastq_logo.png and /dev/null differ diff --git a/docs/images/qbic-pipelines-bamtofastq_logo.svg b/docs/images/qbic-pipelines-bamtofastq_logo.svg deleted file mode 100644 index 866974a8..00000000 --- a/docs/images/qbic-pipelines-bamtofastq_logo.svg +++ /dev/null @@ -1,489 +0,0 @@ - - - - - - - - image/svg+xml - - - - - - - - - diff --git a/docs/output.md b/docs/output.md index f525d000..137c944e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,39 +1,92 @@ -# qbic-pipelines/bamtofastq: Output +# nf-core/bamtofastq: Output + +## Introduction This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + ## Pipeline overview -The pipeline is built using [Nextflow](https://www.nextflow.io/) -and processes data using the following steps: +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + +- [FastQC](#fastqc) - Raw read QC +- [Samtools](#samtools) - collate, extract reads and compute bam/cram stats +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution + +### FastQC + +
+Output files -* [FastQC](#fastqc) - bam and read quality control -* [Samtools](#samtools) - collate, extract reads and compute bam stats -* [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline +- `fastqc/` + - `*.pre_conversion_fastqc.html`: FastQC report containing quality metrics before of input BAM files (Not available for CRAM input). + - `*.pre_conversion_fastqc.zip`: Zip archive containing the FastQC report of the input BAM file, tab-delimited data file and plot images. + - `*.post_conversion_fastqc.html`: FastQC report containing quality metrics before of converted fastq reads. + - `*.post_conversion_fastqc.zip`: Zip archive containing the FastQC report of the converted fastq reads, tab-delimited data file and plot images. -## FastQC +
- [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences. +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). - For further reading and documentation see the [FastQC help](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) + +![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) + +![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) + +> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. ## Samtools -[Samtools](https://www.htslib.org) is used to extract reads from the bam files and to compute some bam statistics. +
+Output files + +- `samtools/` + - `*.*stats?`: samtools statistics files + +
+ +[Samtools](https://www.htslib.org) is used to extract reads from the bam files and to compute some BAM/CRAM statistics. + +The converted and gzipped fastq output reads are written to the directory `results/reads/`. + +
+Read files + +- `reads/` + - `*.merged.fastq.gz`: Paired-end read files + - `*.other.fq.gz` : Single-end read files + +
+ +### MultiQC + +
+Output files + +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + +
-The extracted reads are written to fastq files in `results/reads`. +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. -## MultiQC +Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . -[MultiQC](http://multiqc.info) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in within the report data directory. +### Pipeline information -The pipeline has special steps which allow the software versions used to be reported in the MultiQC output for future traceability. +
+Output files -**Output directory: `results/MultiQC`** +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. -* `Project_multiqc_report.html` - * MultiQC report - a standalone HTML file that can be viewed in your web browser -* `Project_multiqc_data/` - * Directory containing parsed statistics from the different tools used in the pipeline +
-For more information about how to use MultiQC reports, see [http://multiqc.info](http://multiqc.info) +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md index 03cf725d..9d90e730 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,65 +1,46 @@ -# qbic-pipelines/bamtofastq: Usage - -## Table of contents - - - -- [qbic-pipelines/bamtofastq: Usage](#qbic-pipelinesbamtofastq-usage) - - [Table of contents](#table-of-contents) - - [Introduction](#introduction) - - [Running the pipeline](#running-the-pipeline) - - [Updating the pipeline](#updating-the-pipeline) - - [Reproducibility](#reproducibility) - - [Main arguments](#main-arguments) - - [`-profile`](#-profile) - - [`--input`](#--input) - - [`--index_files`](#--index_files) - - [`--cram_files`](#--cram_files) - - [`--reference_fasta`](#--reference_fasta) - - [`--chr` (optional)](#--chr-optional) - - [`--no_read_QC` (optional)](#--no_read_qc-optional) - - [`--samtools_collate_fast` (optional)](#--samtools_collate_fast-optional) - - [`--reads_in_memory` (optional)](#--reads_in_memory-optional) - - [`--no_stats` (optional)](#--no_stats-optional) - - [Job resources](#job-resources) - - [Automatic resubmission](#automatic-resubmission) - - [Custom resource requests](#custom-resource-requests) - - [AWS Batch specific parameters](#aws-batch-specific-parameters) - - [`--awsqueue`](#--awsqueue) - - [`--awsregion`](#--awsregion) - - [Other command line parameters](#other-command-line-parameters) - - [`--outdir`](#--outdir) - - [`--email`](#--email) - - [`--email_on_fail`](#--email_on_fail) - - [`-name`](#-name) - - [`-resume`](#-resume) - - [`-c`](#-c) - - [`--custom_config_version`](#--custom_config_version) - - [`--custom_config_base`](#--custom_config_base) - - [`--max_memory`](#--max_memory) - - [`--max_time`](#--max_time) - - [`--max_cpus`](#--max_cpus) - - [`--plaintext_email`](#--plaintext_email) - - [`--monochrome_logs`](#--monochrome_logs) - - [`--multiqc_config`](#--multiqc_config) - +# nf-core/bamtofastq: Usage + +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/bamtofastq/usage](https://nf-co.re/bamtofastq/usage) + +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ ## Introduction -Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. +Bamtofastq is a workflow designed to convert one or multiple bam/cram files into fastq format. -It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): +## Samplesheet input + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. ```bash -NXF_OPTS='-Xms1g -Xmx4g' +--input '[path to samplesheet file]' +``` + +### Full samplesheet + +The pipeline will auto-detect whether a sample is single- or paired-end. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. If the index files are not available, the files will be automatically indexed during the pipeline run which can have an effect on the runtime. + +```console +sample_id,mapped,index,file_type +test1,test1.cram,test1.cram.crai,cram +test2,test2.cram,test2.cram.crai,cram ``` +| Column | Description | +| ---------- | ----------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. | +| `mapped` | Absolute path to input BAM/CRAM file. Allowed file extensions: ".bam" or ".cram". | +| `index` | If available, provide full path to input BAI/CRAI index file. File extensions must be ".bam.bai" or ".cram.crai". | +| `filetype` | Type of input file. Options: "bam" or "cram". | + +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. + ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run qbic-pipelines/bamtofastq --input '*bam' -profile docker +nextflow run nf-core/bamtofastq --input 'samplesheet.csv' --outdir -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -67,105 +48,119 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: ```bash -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -### Updating the pipeline +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. -When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +> The above pipeline run specified with a params file in yaml format: ```bash -nextflow pull qbic-pipelines/bamtofastq +nextflow run nf-core/bamtofastq -profile docker -params-file params.yaml ``` -### Reproducibility +with `params.yaml` containing: -It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. +```yaml +input: './samplesheet.csv' +outdir: './results/' +fasta: './reference.fasta' +<...> +``` -First, go to the [qbic-pipelines/bamtofastq releases page](https://github.com/qbic-pipelines/bamtofastq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +### Updating the pipeline -## Main arguments +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -### `-profile` +```bash +nextflow pull nf-core/bamtofastq +``` -Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile docker` - the order of arguments is important! +### Reproducibility -If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -- `awsbatch` - - A generic configuration profile to be used with AWS Batch. -- `conda` - - A generic configuration profile to be used with [conda](https://conda.io/docs/) - - Pulls most software from [Bioconda](https://bioconda.github.io/) -- `docker` - - A generic configuration profile to be used with [Docker](http://docker.com/) - - Pulls software from dockerhub: [`nfcore/bamtofastq`](http://hub.docker.com/r/nfcore/bamtofastq/) -- `singularity` - - A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - - Pulls software from DockerHub: [`nfcore/bamtofastq`](http://hub.docker.com/r/nfcore/bamtofastq/) -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters +First, go to the [nf-core/bamtofastq releases page](https://github.com/nf-core/bamtofastq/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -### `--input` +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. -Use this to specify the location of your input Bam files (or CRAM files if used with [`--cram_files`](#--cram_files)). For example: +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -```bash ---input 'path/to/data/sample_*.bam' -``` +> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. -Please note the following requirements: +## Core Nextflow arguments -1. The path must be enclosed in quotes -2. The path must have at least one `*`/`**` wildcard character +> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -### `--index_files` +### `-profile` -Use this to indicate that bam index files are present alongside the input bam files. `--input` then has to contain a regex with a wildcard parameter to allow for both inputs. For example: +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -```bash ---index_files --input 'path/to/data/sample_*.{bam,bai}' -``` +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -Please note the following requirements: +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -1. The path must be enclosed in quotes -2. The path must have at least one `*` wildcard character +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). -### `--cram_files` +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. -Use this to indicate that **all** of the files listed in `--input` are CRAM files instead of BAM files. This enabled a step at the beginning of the workflow that converts each CRAM file to BAM format on the fly. Note that this option is incompatible with [`--index_files`](#--index_files). For example: +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. -```bash ---cram_files --input 'path/to/data/sample_*.cram' -``` +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. + +### `--input` -While the above command is valid, it will only work if the reference genome FASTA file listed in the CRAM header is available (_e.g._ via HTTP/FTP or on the local file system). Otherwise, you will need to use the [`--reference_fasta` option](#--reference_fasta). You can check which reference FASTA file is indicated in the CRAM header with the following command: +Use this to specify the location of your input BAM/CRAM files. For example: ```bash -samtools view -H path/to/sample.cram | grep '@SQ' +--input 'path/to/samplesheet.csv' ``` -Unfortunately, at the time of writing, FastQC [doesn't support](https://github.com/s-andrews/FastQC/issues/54) CRAM files as input. Hence, a benefit of converting CRAM files to BAM format as opposed to converting directly to FASTQ format is that you can perform QC before the final conversion. +### `--fasta` + +When converting a CRAM file the fasta file specified in the CRAM header should be used to decompress the file. If that file is not available, you will need to specify an alternative path using the [`--fasta`](#--fasta) option. +You can check which reference FASTA file should be used by inspecting the CRAM file with the following command: -### `--reference_fasta` +```bash +samtools view -H path/to/sample.cram | grep '@SQ'. +``` -Use this option to indicate which reference genome FASTA file to use when decompressing CRAM files. This is useful if the FASTA file indicated in the CRAM header (see [`--cram_files`](#--cram_files) for more information). For example: +To specify a reference genome FASTA you can follow the command below: ```bash ---cram_files --input 'path/to/data/sample_*.cram' --reference_fasta 'ftp://ftp.broadinstitute.org/pub/seq/references/Homo_sapiens_assembly19.fasta' +--input 'path/to/samplesheet.csv' --fasta 'ftp://ftp.broadinstitute.org/pub/seq/references/Homo_sapiens_assembly19.fasta' ``` ### `--chr` (optional) Use to only obtain reads mapping to a specific chromosome or region. -> It is important to specify the chromosome or region name **exactly** as set in the bam file. Otherwise no reads may be extracted! + +> It is important to specify the chromosome or region name **exactly** as set in the bam/cram file. Otherwise no reads may be extracted! For example: @@ -173,7 +168,12 @@ For example: --chr 'X chrX' ``` -This extracts reads mapping to `X` as well as `chrX` +This extracts reads mapping to `X` as well as `chrX`. +To check beforehand which chromosome notation is used in your bam/cram file you can use samtools. + +```bash +samtools idxstats your_input.[bam|cram] | head -n 25 +``` ### `--no_read_QC` (optional) @@ -185,11 +185,11 @@ Use to skip `FastQC` on obtained reads. This is useful, when the reads are used ### `--samtools_collate_fast` (optional) -Use to specify the fast mode for the `samtools collate` command in the processes `sortExtractMapped`, `sortExtractUnmapped` and `sortExtractSingleEnd`. This option relies on the samtools command line flags `-f -r INT` and will output primary alignments only. For full documentation of this mode please refer to the [samtools documentation](http://www.htslib.org/doc/samtools-collate.html#OPTIONS). +Use to specify the fast mode for the `samtools collate` command in the processes `COLLATE_FASTQ_MAP`, `COLLATE_FASTQ_UNMAP` and `SAMTOOLS_COLLATEFASTQ_SINGLE_END`. This option relies on the samtools command line flags `-f -r INT` and will output primary alignments only. For full documentation of this mode please refer to the [samtools documentation](http://www.htslib.org/doc/samtools-collate.html#OPTIONS). ### `--reads_in_memory` (optional) -Only relevant in combination with `--samtools_collate_fast`. It specifies how many alignment reads are kept in memory [default = '100000']. This is useful for speeding up the processes `sortExtractMapped`, `sortExtractUnmapped` and `sortExtractSingleEnd`. +Only relevant in combination with `--samtools_collate_fast`. It specifies how many alignment reads are kept in memory [default = '100000']. This is useful for speeding up the processes `COLLATE_FASTQ_MAP`, `COLLATE_FASTQ_UNMAP` and `SAMTOOLS_COLLATEFASTQ_SINGLE_END`. Example: @@ -199,7 +199,7 @@ Example: ### `--no_stats` (optional) -Use to skip `FastQC` on both input bam and output reads, as well as all `samtools flagstat`, `samtools idxstats`, and `samtools stats`. This is useful for large datasets, since the quality metrics processes require a significant amount of time and resources. +Use to skip `FastQC` on both input bam/cram and output reads, as well as all processes that compute statistics `samtools flagstat`, `samtools idxstats`, and `samtools stats`. This is useful for large datasets, since the quality metrics processes require a significant amount of time and resources. :exclamation: Use this at own risk. You won't be able to quickly sanity check the results. @@ -259,73 +259,64 @@ This is used in the MultiQC report (if not default) and in the summary HTML / e- ### `-resume` -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. -**NB:** Single hyphen (core Nextflow option) - ### `-c` -Specify the path to a specific config file (this is a core NextFlow command). +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. -**NB:** Single hyphen (core Nextflow option) +## Custom configuration -Note - you can use this to override pipeline defaults. +### Resource requests -### `--custom_config_version` +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default is set to `master`. +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. -```bash -## Download and use config file with following git commid id ---custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96 -``` +### Custom Containers -### `--custom_config_base` +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. -If you're running offline, nextflow will not be able to fetch the institutional config files -from the internet. If you don't need them, then this is not a problem. If you do need them, -you should download the files from the repo and tell nextflow where to find them with the -`custom_config_base` option. For example: +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. -```bash -## Download and unzip the config files -cd /path/to/my/configs -wget https://github.com/nf-core/configs/archive/master.zip -unzip master.zip - -## Run the pipeline -cd /path/to/my/data -nextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/ -``` +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. + +### nf-core/configs -> Note that the nf-core/tools helper package has a `download` command to download all required pipeline -> files + singularity containers + institutional configs in one go for you, to make this process easier. +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -### `--max_memory` +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. -Use to set a top-limit for the default memory requirement for each process. -Should be a string in the format integer-unit. eg. `--max_memory '8.GB'` +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). -### `--max_time` +## Azure Resource Requests -Use to set a top-limit for the default time requirement for each process. -Should be a string in the format integer-unit. eg. `--max_time '2.h'` +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. -### `--max_cpus` +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). -Use to set a top-limit for the default CPU requirement for each process. -Should be a string in the format integer-unit. eg. `--max_cpus 1` +## Running in the background -### `--plaintext_email` +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. -Set to receive plain-text e-mails instead of HTML formatted. +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. -### `--monochrome_logs` +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). -Set to disable colourful command line output and live life in monochrome. +## Nextflow memory requirements -### `--multiqc_config` +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -Specify a path to a custom MultiQC configuration file. +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/environment.yml b/environment.yml deleted file mode 100644 index e802043a..00000000 --- a/environment.yml +++ /dev/null @@ -1,13 +0,0 @@ -# You can use this file to create a conda environment for this pipeline: -# conda env create -f environment.yml -name: qbic-pipelines-bamtofastq-1.2.0 -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::multiqc=1.9 - - bioconda::samtools=1.10 - - bioconda::fastqc=0.11.9 - - conda-forge::r-markdown=1.1 - - conda-forge::r-base=3.6.1 diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy new file mode 100755 index 00000000..9b34804d --- /dev/null +++ b/lib/NfcoreSchema.groovy @@ -0,0 +1,530 @@ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// + +import nextflow.Nextflow +import org.everit.json.schema.Schema +import org.everit.json.schema.loader.SchemaLoader +import org.everit.json.schema.ValidationException +import org.json.JSONObject +import org.json.JSONTokener +import org.json.JSONArray +import groovy.json.JsonSlurper +import groovy.json.JsonBuilder + +class NfcoreSchema { + + // + // Resolve Schema path relative to main workflow directory + // + public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { + return "${workflow.projectDir}/${schema_filename}" + } + + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // + /* groovylint-disable-next-line UnusedPrivateMethodParameter */ + public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { + def has_error = false + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + // Check for nextflow core params and unexpected params + def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text + def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') + def nf_params = [ + // Options for base `nextflow` command + 'bg', + 'c', + 'C', + 'config', + 'd', + 'D', + 'dockerize', + 'h', + 'log', + 'q', + 'quiet', + 'syslog', + 'v', + + // Options for `nextflow run` command + 'ansi', + 'ansi-log', + 'bg', + 'bucket-dir', + 'c', + 'cache', + 'config', + 'dsl2', + 'dump-channels', + 'dump-hashes', + 'E', + 'entry', + 'latest', + 'lib', + 'main-script', + 'N', + 'name', + 'offline', + 'params-file', + 'pi', + 'plugins', + 'poll-interval', + 'pool-size', + 'profile', + 'ps', + 'qs', + 'queue-size', + 'r', + 'resume', + 'revision', + 'stdin', + 'stub', + 'stub-run', + 'test', + 'w', + 'with-apptainer', + 'with-charliecloud', + 'with-conda', + 'with-dag', + 'with-docker', + 'with-mpi', + 'with-notification', + 'with-podman', + 'with-report', + 'with-singularity', + 'with-timeline', + 'with-tower', + 'with-trace', + 'with-weblog', + 'without-docker', + 'without-podman', + 'work-dir' + ] + def unexpectedParams = [] + + // Collect expected parameters from the schema + def expectedParams = [] + def enums = [:] + for (group in schemaParams) { + for (p in group.value['properties']) { + expectedParams.push(p.key) + if (group.value['properties'][p.key].containsKey('enum')) { + enums[p.key] = group.value['properties'][p.key]['enum'] + } + } + } + + for (specifiedParam in params.keySet()) { + // nextflow params + if (nf_params.contains(specifiedParam)) { + log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" + has_error = true + } + // unexpected params + def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' + def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } + def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() + def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { + // Temporarily remove camelCase/camel-case params #1035 + def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} + if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ + unexpectedParams.push(specifiedParam) + } + } + } + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + // Validate parameters against the schema + InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() + JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) + + // Remove anything that's in params.schema_ignore_params + raw_schema = removeIgnoredParams(raw_schema, params) + + Schema schema = SchemaLoader.load(raw_schema) + + // Clean the parameters + def cleanedParams = cleanParameters(params) + + // Convert to JSONObject + def jsonParams = new JsonBuilder(cleanedParams) + JSONObject params_json = new JSONObject(jsonParams.toString()) + + // Validate + try { + schema.validate(params_json) + } catch (ValidationException e) { + println '' + log.error 'ERROR: Validation of pipeline parameters failed!' + JSONObject exceptionJSON = e.toJSON() + printExceptions(exceptionJSON, params_json, log, enums) + println '' + has_error = true + } + + // Check for unexpected parameters + if (unexpectedParams.size() > 0) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + println '' + def warn_msg = 'Found unexpected parameters:' + for (unexpectedParam in unexpectedParams) { + warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" + } + log.warn warn_msg + log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" + println '' + } + + if (has_error) { + Nextflow.error('Exiting!') + } + } + + // + // Beautify parameters for --help + // + public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + Integer num_hidden = 0 + String output = '' + output += 'Typical pipeline command:\n\n' + output += " ${colors.cyan}${command}${colors.reset}\n\n" + Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + Integer max_chars = paramsMaxChars(params_map) + 1 + Integer desc_indent = max_chars + 14 + Integer dec_linewidth = 160 - desc_indent + for (group in params_map.keySet()) { + Integer num_params = 0 + String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (group_params.get(param).hidden && !params.show_hidden_params) { + num_hidden += 1 + continue; + } + def type = '[' + group_params.get(param).type + ']' + def description = group_params.get(param).description + def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' + def description_default = description + colors.dim + defaultValue + colors.reset + // Wrap long description texts + // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap + if (description_default.length() > dec_linewidth){ + List olines = [] + String oline = "" // " " * indent + description_default.split(" ").each() { wrd -> + if ((oline.size() + wrd.size()) <= dec_linewidth) { + oline += wrd + " " + } else { + olines += oline + oline = wrd + " " + } + } + olines += oline + description_default = olines.join("\n" + " " * desc_indent) + } + group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' + num_params += 1 + } + group_output += '\n' + if (num_params > 0){ + output += group_output + } + } + if (num_hidden > 0){ + output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset + } + output += NfcoreTemplate.dashedLine(params.monochrome_logs) + return output + } + + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // + public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { + // Get a selection of core Nextflow workflow options + def Map workflow_summary = [:] + if (workflow.revision) { + workflow_summary['revision'] = workflow.revision + } + workflow_summary['runName'] = workflow.runName + if (workflow.containerEngine) { + workflow_summary['containerEngine'] = workflow.containerEngine + } + if (workflow.container) { + workflow_summary['container'] = workflow.container + } + workflow_summary['launchDir'] = workflow.launchDir + workflow_summary['workDir'] = workflow.workDir + workflow_summary['projectDir'] = workflow.projectDir + workflow_summary['userName'] = workflow.userName + workflow_summary['profile'] = workflow.profile + workflow_summary['configFiles'] = workflow.configFiles.join(', ') + + // Get pipeline parameters defined in JSON Schema + def Map params_summary = [:] + def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + for (group in params_map.keySet()) { + def sub_params = new LinkedHashMap() + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (params.containsKey(param)) { + def params_value = params.get(param) + def schema_value = group_params.get(param).default + def param_type = group_params.get(param).type + if (schema_value != null) { + if (param_type == 'string') { + if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { + def sub_string = schema_value.replace('\$projectDir', '') + sub_string = sub_string.replace('\${projectDir}', '') + if (params_value.contains(sub_string)) { + schema_value = params_value + } + } + if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { + def sub_string = schema_value.replace('\$params.outdir', '') + sub_string = sub_string.replace('\${params.outdir}', '') + if ("${params.outdir}${sub_string}" == params_value) { + schema_value = params_value + } + } + } + } + + // We have a default in the schema, and this isn't it + if (schema_value != null && params_value != schema_value) { + sub_params.put(param, params_value) + } + // No default in the schema, and this isn't empty + else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { + sub_params.put(param, params_value) + } + } + } + params_summary.put(group, sub_params) + } + return [ 'Core Nextflow options' : workflow_summary ] << params_summary + } + + // + // Beautify parameters for summary and return as string + // + public static String paramsSummaryLog(workflow, params) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + String output = '' + def params_map = paramsSummaryMap(workflow, params) + def max_chars = paramsMaxChars(params_map) + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + if (group_params) { + output += colors.bold + group + colors.reset + '\n' + for (param in group_params.keySet()) { + output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' + } + output += '\n' + } + } + output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" + output += NfcoreTemplate.dashedLine(params.monochrome_logs) + return output + } + + // + // Loop over nested exceptions and print the causingException + // + private static void printExceptions(ex_json, params_json, log, enums, limit=5) { + def causingExceptions = ex_json['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if (m.matches()) { + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if (ex_json['pointerToViolation'] == '#') { + log.error "* ${ex_json['message']}" + } + // Error with specific param + else { + def param = ex_json['pointerToViolation'] - ~/^#\// + def param_val = params_json[param].toString() + if (enums.containsKey(param)) { + def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" + if (enums[param].size() > limit) { + log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" + } else { + log.error "${error_msg}: ${enums[param].join(', ')})" + } + } else { + log.error "* --${param}: ${ex_json['message']} (${param_val})" + } + } + } + for (ex in causingExceptions) { + printExceptions(ex, params_json, log, enums) + } + } + + // + // Remove an element from a JSONArray + // + private static JSONArray removeElement(json_array, element) { + def list = [] + int len = json_array.length() + for (int i=0;i + if(raw_schema.keySet().contains('definitions')){ + raw_schema.definitions.each { definition -> + for (key in definition.keySet()){ + if (definition[key].get("properties").keySet().contains(ignore_param)){ + // Remove the param to ignore + definition[key].get("properties").remove(ignore_param) + // If the param was required, change this + if (definition[key].has("required")) { + def cleaned_required = removeElement(definition[key].required, ignore_param) + definition[key].put("required", cleaned_required) + } + } + } + } + } + if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { + raw_schema.get("properties").remove(ignore_param) + } + if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { + def cleaned_required = removeElement(raw_schema.required, ignore_param) + raw_schema.put("required", cleaned_required) + } + } + return raw_schema + } + + // + // Clean and check parameters relative to Nextflow native classes + // + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + + // + // This function tries to read a JSON params file + // + private static LinkedHashMap paramsLoad(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = paramsRead(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - + private static LinkedHashMap paramsRead(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') + def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + * properties <- parameters can also be ungrouped, outside of definitions + parameter 1 + type + description + */ + + // Grouped params + def params_map = new LinkedHashMap() + schema_definitions.each { key, val -> + def Map group = schema_definitions."$key".properties // Gets the property object of the group + def title = schema_definitions."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + + // Ungrouped params + def ungrouped_params = new LinkedHashMap() + schema_properties.each { innerkey, value -> + ungrouped_params.put(innerkey, value) + } + params_map.put("Other parameters", ungrouped_params) + + return params_map + } + + // + // Get maximum number of characters across all parameter names + // + private static Integer paramsMaxChars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } +} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy new file mode 100755 index 00000000..25a0a74a --- /dev/null +++ b/lib/NfcoreTemplate.groovy @@ -0,0 +1,336 @@ +// +// This file holds several functions used within the nf-core pipeline template. +// + +import org.yaml.snakeyaml.Yaml + +class NfcoreTemplate { + + // + // Check AWS Batch related parameters have been specified correctly + // + public static void awsBatch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + // Check params.awsqueue and params.awsregion have been set if running on AWSBatch + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + } + } + + // + // Warn if a -profile or Nextflow config has not been provided to run the pipeline + // + public static void checkConfigProvided(workflow, log) { + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + } + } + + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + + // + // Construct and send completion email + // + public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = NfcoreTemplate.version(workflow) + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + // + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack + // + public static void IM_notification(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = NfcoreTemplate.version(workflow) + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + + // + // Print pipeline summary on completion + // + public static void summary(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } + + // + // Does what is says on the tin + // + public static String dashedLine(monochrome_logs) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + // + // nf-core logo + // + public static String logo(workflow, monochrome_logs) { + Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy new file mode 100644 index 00000000..8d030f4e --- /dev/null +++ b/lib/Utils.groovy @@ -0,0 +1,47 @@ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + } +} diff --git a/lib/WorkflowBamtofastq.groovy b/lib/WorkflowBamtofastq.groovy new file mode 100755 index 00000000..b815c6ca --- /dev/null +++ b/lib/WorkflowBamtofastq.groovy @@ -0,0 +1,75 @@ +// +// This file holds several functions specific to the workflow/bamtofastq.nf in the nf-core/bamtofastq pipeline +// + +import nextflow.Nextflow +import groovy.text.SimpleTemplateEngine + +class WorkflowBamtofastq { + + // + // Check and validate parameters + // + public static void initialise(params, log) { + genomeExistsError(params, log) + + } + + // + // Get workflow summary for MultiQC + // + public static String paramsSummaryMultiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + for (param in group_params.keySet()) { + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html + } + + // + // Exit pipeline if incorrect --genome key provided + // + private static void genomeExistsError(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.error(error_string) + } + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy new file mode 100755 index 00000000..9a30d857 --- /dev/null +++ b/lib/WorkflowMain.groovy @@ -0,0 +1,99 @@ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/bamtofastq pipeline +// + +import nextflow.Nextflow + +class WorkflowMain { + + // + // Citation string for pipeline + // + public static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " https://doi.org/10.5281/zenodo.284730479" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + // + // Generate help string + // + public static String help(workflow, params) { + def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + def help_string = '' + help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) + help_string += NfcoreSchema.paramsHelp(workflow, params, command) + help_string += '\n' + citation(workflow) + '\n' + help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) + return help_string + } + + // + // Generate parameter summary log string + // + public static String paramsSummaryLog(workflow, params) { + def summary_log = '' + summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) + summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) + summary_log += '\n' + citation(workflow) + '\n' + summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) + return summary_log + } + + // + // Validate parameters and print summary to screen + // + public static void initialise(workflow, params, log) { + // Print help to screen if required + if (params.help) { + log.info help(workflow, params) + System.exit(0) + } + + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) + } + + // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params) + + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + + // Check that a -profile or Nextflow config has been provided to run the pipeline + NfcoreTemplate.checkConfigProvided(workflow, log) + + // Check that conda channels are set-up correctly + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + Utils.checkCondaChannels(log) + } + + // Check AWS batch settings + NfcoreTemplate.awsBatch(workflow, params) + + // Check input has been provided + if (!params.input) { + Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") + } + } + // + // Get attribute from genome config file e.g. fasta + // + public static Object getGenomeAttribute(params, attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null + } +} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar new file mode 100644 index 00000000..805c8bb5 Binary files /dev/null and b/lib/nfcore_external_java_deps.jar differ diff --git a/main.nf b/main.nf index 31daa7c0..9c24caf0 100644 --- a/main.nf +++ b/main.nf @@ -1,861 +1,64 @@ #!/usr/bin/env nextflow /* -======================================================================================== - qbic-pipelines/bamtofastq -======================================================================================== - qbic-pipelines/bamtofastq Analysis Pipeline. - An open-source analysis pipeline to convert mapped or unmapped single-end or paired-end - reads from bam format to fastq format - #### Homepage / Documentation - https://github.com/qbic-pipelines/bamtofastq +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/bamtofastq +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/bamtofastq + Website: https://nf-co.re/bamtofastq + Slack : https://nfcore.slack.com/channels/bamtofastq ---------------------------------------------------------------------------------------- */ -def helpMessage() { - log.info nfcoreHeader() - log.info""" - - Usage: - - The typical command for running the pipeline is as follows: - - nextflow run qbic-pipelines/bamtofastq --input '*bam' -profile cfc - - Mandatory arguments: - --input [file] Path to input data, multiple files can be specified by using wildcard characters - -profile [str] Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, awsbatch, test and more. - - Other options: - --outdir [file] The output directory where the results will be saved - --chr [str] Only use reads mapping to a specific chromosome/region. Has to be specified as in bam: i.e chr1, chr{1..22} (gets all reads mapping to chr1 to 22), 1, "X Y", incorrect naming will lead to a potentially silent error - --index_files [bool] Index files are provided (incompatible with cram_files) - --cram_files [bool] CRAM files (and not BAM files) are provided (incompatible with index_files) - --reference_fasta [file] Reference genome FASTA file used for CRAM compression (can be omitted if the reference in the CRAM header is available) - --samtools_collate_fast [bool] Uses fast mode for samtools collate in `sortExtractMapped`, `sortExtractUnmapped` and `sortExtractSingleEnd` - --reads_in_memory [str] Reads to store in memory [default = '100000']. Only relevant for use with `--samtools_collate_fast`. - --no_read_QC [bool] If specified, no quality control will be performed on extracted reads. Useful, if this is done anyways in the subsequent workflow - --no_stats [bool] If specified, skips all quality control and stats computation, including `FastQC` on both input bam and output reads, `samtools flagstat`, `samtools idxstats`, and `samtools stats` - --email [str] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - --email_on_fail [str] Same as --email, except only send mail if the workflow is not successful - --maxMultiqcEmailFileSize [str] Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. - - AWSBatch options: - --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion [str] The AWS Region for your AWS Batch job to run on - """.stripIndent() -} - -// Show help message -if (params.help) { - helpMessage() - exit 0 -} - -/* - * SET UP CONFIGURATION VARIABLES - */ - -// Has the run name been specified by the user? -// this has the bonus effect of catching both -name and --name -custom_runName = params.name -if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { - custom_runName = workflow.runName -} - -if ( workflow.profile == 'awsbatch') { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." -} - -// Stage config files -ch_multiqc_config = file("$baseDir/assets/multiqc_config.yaml", checkIfExists: true) -ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) - - -/* - * Create a channel for input files - */ -if (params.input_paths){ - - Channel - .from( params.input_paths ) - .map { row -> [ row[0], file(row[1][0]), file(row[1][1])] } - .into{ ch_idxstats; - ch_flagstats; - ch_stats; - ch_input_fastqc; - ch_processing; - } -} else { - - if(params.index_files){ //Index files are provided - - if (params.cram_files) { - exit 1, "Parameter 'params.cram_files' isn't compatible with '--index_files'!\n" - } - - Channel.fromFilePairs(params.input, flat:true, checkIfExists:true) { file -> file.name.replaceAll(/.bam|.bai$/,'') } - .map { name, file1, file2 -> - //Ensure second element in ma will be bam, and third bai - if(file2.extension.toString() == 'bai'){ - bam = file1 - bai = file2 - }else{ - bam = file2 - bai = file1 - } - [name, bam, bai]} // Map: [ name, name.bam, name.bam.bai ] - .into { ch_idxstats; - ch_flagstats; - ch_stats; - ch_input_fastqc; - ch_processing } - - } else if(!params.index_files) { //Index files need to be computed - - if (params.cram_files) { - - ch_reference_fasta = params.reference_fasta ? Channel.value(file(params.reference_fasta)) : "null" - - Channel - .fromPath( params.input, checkIfExists: true) - .map { file -> tuple(file.name.replaceAll(".cram", ''), file) } // Map: [name, name.cram] (map cram file name w/o cram to file) - .set { ch_cram_files } - - } else { - - Channel - .fromPath(params.input, checkIfExists: true) - .map { file -> tuple(file.name.replaceAll(".bam",''), file) } // Map: [name, name.bam] (map bam file name w/o bam to file) - .set { bam_files_index } - - } - - }else{ - exit 1, "Parameter 'params.input' was not specified!\n" - } -} - -// Header log info -log.info nfcoreHeader() -def summary = [:] -if (workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = custom_runName ?: workflow.runName -summary['Input'] = params.input -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output dir'] = params.outdir -if (params.chr) summary['Only reads mapped to chr'] = params.chr -if (params.index_files) summary['Index files available'] = params.index_files -summary['Read QC'] = params.no_read_QC ? 'No' : 'Yes' -summary['Stats'] = params.no_stats ? 'No' : 'Yes' -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName -if (workflow.profile == 'awsbatch') { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue -} -summary['Config Profile'] = workflow.profile -if (params.config_profile_description) summary['Config Description'] = params.config_profile_description -if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if (params.config_profile_url) summary['Config URL'] = params.config_profile_url -if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize -} -log.info summary.collect { k,v -> "${k.padRight(26)}: $v" }.join("\n") -log.info "-\033[2m--------------------------------------------------\033[0m-" - -// Check the hostnames against configured profiles -checkHostname() - -def create_workflow_summary(summary) { - def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') - yaml_file.text = """ - id: 'qbic-pipelines-bamtofastq-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'qbic-pipelines/bamtofastq Workflow Summary' - section_href: 'https://github.com/qbic-pipelines/bamtofastq' - plot_type: 'html' - data: | -
    -${summary.collect { k,v -> "
    $k
    ${v ?: 'N/A'}
    " }.join("\n")} -
    - """.stripIndent() - - return yaml_file -} - -/* - * Parse software version numbers - */ -process get_software_versions { - publishDir "${params.outdir}/pipeline_info", mode: 'copy', - saveAs: { filename -> - if (filename.indexOf(".csv") > 0) filename - else null - } - label 'process_low' - - - output: - file 'software_versions_mqc.yaml' into software_versions_yaml - file "software_versions.csv" - file "*.txt" - - script: - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - fastqc --version &> v_fastqc.txt - samtools --version > v_samtools.txt - multiqc --version > v_multiqc.txt - scrape_software_versions.py &> software_versions_mqc.yaml - """ -} - -/* - * Generate BAM files if input files are CRAM files - */ - -if ( params.cram_files ) { - - process cramToBamWithReference { - tag "$name" - label 'process_medium' - - input: - set val(name), file(cram) from ch_cram_files - file fasta from ch_reference_fasta - - output: - set val("$name"), file("${name}.bam") into bam_files_index - - script: - refOptions = params.reference_fasta ? "-T ${fasta}" : "" - """ - samtools view -b -@${task.cpus} ${refOptions} ${cram} -o ${name}.bam - """ - } - -} - +nextflow.enable.dsl = 2 /* - * Step 0: If index_files not provided as input compute them first - */ -if(!params.index_files){ - process IndexBAM { - tag "$name" - label 'process_medium' - - input: - set val(name), file(bam) from bam_files_index - - output: - set val(name), file(bam), file("*.bai") into (ch_idxstats, ch_flagstats, ch_stats, ch_input_fastqc, ch_processing) - - when: - !params.index_files //redundant, since the input channel only exists, if no indices are provided - - script: - """ - samtools index ${bam} - """ - } -} - - -/* - * Step 0: Compute statistics on the input bam files - */ -process computeIdxstatsInput { - tag "$name" - label 'process_medium' - - input: - set val(name), file(bam), file(bai) from ch_idxstats - - output: - file "*.idxstats" into ch_bam_idxstat_mqc - - when: - !params.no_stats - - script: - """ - samtools idxstats $bam > "${bam}.idxstats" - """ -} - - -process computeFlagstatInput{ - tag "$name" - label 'process_medium' - - input: - set val(name), file(bam), file(bai) from ch_flagstats - - output: - file "*.flagstat" into ch_bam_flagstat_mqc - - when: - !params.no_stats - - script: - """ - samtools flagstat -@$task.cpus ${bam} > ${bam}.flagstat - """ -} - - -process computeStatsInput{ - - tag "$name" - label 'process_medium' - - input: - set val(name), file(bam), file(bai) from ch_stats - - output: - file "*.stats" into ch_bam_stats_mqc - - when: - !params.no_stats - - script: - """ - samtools stats -@$task.cpus ${bam} > ${bam}.stats - """ -} - - -process computeFastQCInput{ - tag "$name" - label 'process_medium' - - input: - set val(name), file(bam), file(bai) from ch_input_fastqc - - output: - file "*.{zip,html}" into ch_fastqc_reports_mqc_input_bam - - when: - !params.no_stats - - script: - """ - fastqc --quiet --threads $task.cpus ${bam} - """ -} - - -// Extract reads mapping to specific chromosome(s) -if (params.chr){ - process extractReadsMappingToChromosome{ - tag "${name}.${chr_list_joined}" - label 'process_medium' - - input: - set val(name), file(bam), file(bai) from ch_processing - - output: - set val("${name}.${chr_list_joined}"), file("${name}.${chr_list_joined}.bam"), file("${name}.${chr_list_joined}.bam.bai") into bam_files_check - - script: - //If multiple chr were specified, then join space separated list for naming: chr1 chr2 -> chr1_chr2, also resolve region specification with format chr:start-end - chr_list_joined = params.chr.split(' |-|:').size() > 1 ? params.chr.split(' |-|:').join('_') : params.chr - """ - samtools view -hb $bam ${params.chr} -@$task.cpus -o "${name}.${chr_list_joined}.bam" - samtools index "${name}.${chr_list_joined}.bam" - """ - } -} else{ - bam_files_check = ch_processing -} - - -/* - * STEP 1: Check for paired-end or single-end bam - */ -process checkIfPairedEnd{ - tag "$name" - label 'process_low' - input: - set val(name), file(bam), file(bai) from bam_files_check - - output: - set val(name), file(bam), file(bai), file('*paired.txt') optional true into bam_files_paired_map_map, - bam_files_paired_unmap_unmap, - bam_files_paired_unmap_map, - bam_files_paired_map_unmap - set val(name), file(bam), file(bai), file('*single.txt') optional true into bam_file_single_end // = is not paired end - - //Take samtools header + the first 1000 reads (to safe time, otherwise also all can be used) and check whether for - //all, the flag for paired-end is set. Compare: https://www.biostars.org/p/178730/ . - script: - """ - if [ \$({ samtools view -H $bam -@$task.cpus ; samtools view $bam -@$task.cpus | head -n1000; } | samtools view -c -f 1 -@$task.cpus | awk '{print \$1/1000}') = "1" ]; then - echo 1 > ${name}.paired.txt - else - echo 0 > ${name}.single.txt - fi - """ -} - - -/* - * Step 2a: Handle paired-end bams - */ -process pairedEndMapMap{ - tag "$name" - label 'process_low' - input: - set val(name), file(bam), file(bai), file(txt) from bam_files_paired_map_map - - output: - set val(name), file( '*.map_map.bam') into map_map_bam - - when: - txt.exists() - - script: - """ - samtools view -b -f1 -F12 $bam -@$task.cpus -o ${name}.map_map.bam - """ -} - -process pairedEndUnmapUnmap{ - tag "$name" - label 'process_low' - input: - set val(name), file(bam), file(bai), file(txt) from bam_files_paired_unmap_unmap - - output: - set val(name), file('*.unmap_unmap.bam') into unmap_unmap_bam - - when: - txt.exists() - - script: - """ - samtools view -b -f12 -F256 $bam -@${task.cpus} -o ${name}.unmap_unmap.bam - """ -} - -process pairedEndUnmapMap{ - tag "$name" - label 'process_low' - input: - set val(name), file(bam), file(bai), file(txt) from bam_files_paired_unmap_map - - output: - set val(name), file( '*.unmap_map.bam') into unmap_map_bam - - when: - txt.exists() - - script: - """ - samtools view -b -f4 -F264 $bam -@${task.cpus} -o ${name}.unmap_map.bam - """ -} - -process pairedEndMapUnmap{ - tag "$name" - label 'process_low' - input: - set val(name), file(bam), file(bai), file(txt) from bam_files_paired_map_unmap - - output: - set val(name), file( '*.map_unmap.bam') into map_unmap_bam - - when: - txt.exists() - - script: - """ - samtools view -b -f8 -F260 $bam -@${task.cpus} -o ${name}.map_unmap.bam - """ -} - -unmap_unmap_bam.join(map_unmap_bam, remainder: true) - .join(unmap_map_bam, remainder: true) - .set{ all_unmapped_bam } - -process mergeUnmapped{ - tag "$name" - label 'process_low' - input: - set val(name), file(unmap_unmap), file (map_unmap), file(unmap_map) from all_unmapped_bam - - output: - set val(name), file('*.merged_unmapped.bam') into merged_unmapped - - script: - """ - samtools merge ${name}.merged_unmapped.bam $unmap_unmap $map_unmap $unmap_map -@$task.cpus - """ -} - -process sortExtractMapped{ - tag "$name" - label 'process_medium' - - input: - set val(name), file(all_map_bam) from map_map_bam - - output: - set val(name), file('*_mapped.fq.gz') into reads_mapped - - script: - def collate_fast = params.samtools_collate_fast ? "-f -r " + params.reads_in_memory : "" - """ - samtools collate -O -@$task.cpus $collate_fast $all_map_bam . \ - | samtools fastq -1 ${name}_R1_mapped.fq.gz -2 ${name}_R2_mapped.fq.gz -s ${name}_mapped_singletons.fq.gz -N -@$task.cpus - """ -} - -process sortExtractUnmapped{ - label 'process_medium' - tag "$name" - - input: - set val(name), file(all_unmapped) from merged_unmapped - - output: - set val(name), file('*_unmapped.fq.gz') into reads_unmapped - - script: - def collate_fast = params.samtools_collate_fast ? "-f -r " + params.reads_in_memory : "" - """ - samtools collate -O -@$task.cpus $collate_fast $all_unmapped . \ - | samtools fastq -1 ${name}_R1_unmapped.fq.gz -2 ${name}_R2_unmapped.fq.gz -s ${name}_unmapped_singletons.fq.gz -N -@$task.cpus - """ -} - -reads_mapped.join(reads_unmapped, remainder: true) - .map{ - row -> tuple(row[0], row[1][0], row[1][1], row[2][0], row[2][1]) - } - .set{ all_fastq } - -process joinMappedAndUnmappedFastq{ - label 'process_low' - tag "$name" - publishDir "${params.outdir}/reads", mode: 'copy', - saveAs: { filename -> - if (filename.indexOf(".fq.gz") > 0) filename - else null - } - - input: - set val(name), file(mapped_fq1), file(mapped_fq2), file(unmapped_fq1), file(unmapped_fq2) from all_fastq.filter{ it.size()>0 } - - output: - set file('*1.fq.gz'), file('*2.fq.gz') into read_qc - - - script: - """ - cat $unmapped_fq1 >> $mapped_fq1 - mv $mapped_fq1 ${name}.1.fq.gz - cat $unmapped_fq2 >> $mapped_fq2 - mv $mapped_fq2 ${name}.2.fq.gz - """ -} - - -process pairedEndReadsQC{ - label 'process_medium' - tag "$read1" - - input: - set file(read1), file(read2) from read_qc - - output: - file "*.{zip,html}" into ch_fastqc_reports_mqc_pe - - when: - !params.no_read_QC && !params.no_stats - - script: - """ - fastqc --quiet --threads $task.cpus $read1 $read2 - """ -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fasta_fai = WorkflowMain.getGenomeAttribute(params, 'fasta_fai') /* - * STEP 2b: Handle single-end bams - */ -process sortExtractSingleEnd{ - tag "$name" - label 'process_medium' - - publishDir "${params.outdir}/reads", mode: 'copy', - saveAs: { filename -> - if (filename.indexOf(".fq.gz") > 0) filename - else null - } - - input: - set val(name), file(bam), file(bai), file(txt) from bam_file_single_end - - output: - set val(name), file ('*.singleton.fq.gz') into single_end_reads - - when: - txt.exists() - - script: - def collate_fast = params.samtools_collate_fast ? "-f -r " + params.reads_in_memory : "" - """ - samtools collate -O -@$task.cpus $collate_fast $bam . \ - | samtools fastq -0 ${name}.singleton.fq.gz -N -@$task.cpus - """ - } - - -process singleEndReadQC{ - tag "$name" - label 'process_medium' - - - input: - set val(name), file(reads) from single_end_reads - - output: - file "*.{zip,html}" into ch_fastqc_reports_mqc_se - - when: - !params.no_read_QC && !params.no_stats - - script: - """ - fastqc --quiet --threads $task.cpus ${reads} - """ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE & PRINT PARAMETER SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -} +WorkflowMain.initialise(workflow, params, log) /* - * STEP 3 - Output Description HTML - */ -process output_documentation { - publishDir "${params.outdir}/pipeline_info", mode: 'copy' - label 'process_low' - - input: - file output_docs from ch_output_docs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOW FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - output: - file "results_description.html" +include { BAMTOFASTQ } from './workflows/bamtofastq' - script: - """ - markdown_to_html.r $output_docs results_description.html - """ +// +// WORKFLOW: Run main nf-core/bamtofastq analysis pipeline +// +workflow NFCORE_BAMTOFASTQ { + BAMTOFASTQ () } /* - * STEP 4 - MultiQC - */ -process multiqc { - publishDir "${params.outdir}/MultiQC", mode: 'copy' - label 'process_low' - - input: - file multiqc_config from ch_multiqc_config - - file ('software_versions/*') from software_versions_yaml.collect() - file workflow_summary from create_workflow_summary(summary) - file flagstats from ch_bam_flagstat_mqc.collect() - file stats from ch_bam_stats_mqc.collect() - file idxstats from ch_bam_idxstat_mqc.collect() - file fastqc_bam from ch_fastqc_reports_mqc_input_bam.collect().ifEmpty([]) - file fastqc_se from ch_fastqc_reports_mqc_se.collect().ifEmpty([]) - file fastqc_pe from ch_fastqc_reports_mqc_pe.collect().ifEmpty([]) - - output: - file "*multiqc_report.html" - file "*_data" - file "multiqc_plots" - - script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - """ - multiqc -f -s $rtitle $rfilename $multiqc_config . - """ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN ALL WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// +workflow { + NFCORE_BAMTOFASTQ () } /* - * Completion e-mail notification - */ -workflow.onComplete { - - // Set up the e-mail variables - def subject = "[qbic-pipelines/bamtofastq] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[qbic-pipelines/bamtofastq] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = custom_runName ?: workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - if (workflow.container) email_fields['summary']['Docker image'] = workflow.container - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList) { - log.warn "[qbic-pipelines/bamtofastq] Found multiple reports from process 'multiqc', will use only one" - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[qbic-pipelines/bamtofastq] Could not attach MultiQC report to summary email" - } - - // Check if we are only sending emails on failure - email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$baseDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$baseDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] - def sf = new File("$baseDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (email_address) { - try { - if ( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[qbic-pipelines/bamtofastq] Sent summary e-mail to $email_address (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - [ 'mail', '-s', subject, email_address ].execute() << email_txt - log.info "[qbic-pipelines/bamtofastq] Sent summary e-mail to $email_address (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File( "${params.outdir}/pipeline_info/" ) - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File( output_d, "pipeline_report.html" ) - output_hf.withWriter { w -> w << email_html } - def output_tf = new File( output_d, "pipeline_report.txt" ) - output_tf.withWriter { w -> w << email_txt } - - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - - if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" - log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}" - log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}" - } - - if (workflow.success) { - log.info "${c_purple}[qbic-pipelines/bamtofastq]${c_green} Pipeline completed successfully${c_reset}" - } else { - checkHostname() - log.info "${c_purple}[qbic-pipelines/bamtofastq]${c_red} Pipeline completed with errors${c_reset}" - } - -} - - -def nfcoreHeader(){ - // Log colors ANSI codes - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_dim = params.monochrome_logs ? '' : "\033[2m"; - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; - c_white = params.monochrome_logs ? '' : "\033[0;37m"; - - return """ -${c_dim}--------------------------------------------------${c_reset}- - ${c_green},--.${c_black}/${c_green},-.${c_reset} - ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} - ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} - ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} - ${c_green}`._,._,\'${c_reset} - ${c_purple} qbic-pipelines/bamtofastq v${workflow.manifest.version}${c_reset} - -${c_dim}--------------------------------------------------${c_reset}- - """.stripIndent() -} - -def checkHostname(){ - def c_reset = params.monochrome_logs ? '' : "\033[0m" - def c_white = params.monochrome_logs ? '' : "\033[0;37m" - def c_red = params.monochrome_logs ? '' : "\033[1;91m" - def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if (params.hostnames) { - def hostname = "hostname".execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if (hostname.contains(hname) && !workflow.profile.contains(prof)) { - log.error "====================================================\n" + - " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + - " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - "============================================================" - } - } - } - } -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json new file mode 100644 index 00000000..51f0f25f --- /dev/null +++ b/modules.json @@ -0,0 +1,72 @@ +{ + "name": "nf-core/bamtofastq", + "homePage": "https://github.com/nf-core/bamtofastq", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "cat/fastq": { + "branch": "master", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"] + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/collatefastq": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/merge": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + } + } + } + } + } +} diff --git a/modules/local/check_paired_end.nf b/modules/local/check_paired_end.nf new file mode 100644 index 00000000..7d757ee8 --- /dev/null +++ b/modules/local/check_paired_end.nf @@ -0,0 +1,38 @@ +process CHECK_IF_PAIRED_END { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input), path(index) + path (fasta) + + output: + tuple val(meta), path("*single.txt"), emit: single_end, optional: true + tuple val(meta), path("*paired.txt"), emit: paired_end, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = meta.filetype == "cram" ? "--reference ${fasta}" : "" + """ + if [ \$({ samtools view -H $reference $input -@$task.cpus ; samtools view $reference $input -@$task.cpus | head -n1000; } | samtools view $reference -c -f 1 -@$task.cpus | awk '{print \$1/1000}') = "1" ]; then + echo 1 > ${prefix}.paired.txt + else + echo 1 > ${prefix}.single.txt + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 00000000..5021e6fc --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,80 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 00000000..8a39e309 --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,40 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf new file mode 100644 index 00000000..ebc87273 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_single' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda "bioconda::multiqc=1.14" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 00000000..c32657de --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,36 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline template +keywords: + - custom + - dump + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] +input: + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" + +output: + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 00000000..da033408 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + + +import yaml +import platform +from textwrap import dedent + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..07d5e433 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,51 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::fastqc=0.11.9" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : + 'biocontainers/fastqc:0.11.9--0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + fastqc $args --threads $task.cpus $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml new file mode 100644 index 00000000..4da5bb5a --- /dev/null +++ b/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,52 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 00000000..1fc387be --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,53 @@ +process MULTIQC { + label 'process_single' + + conda "bioconda::multiqc=1.14" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + touch multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 00000000..f93b5ee5 --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,56 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: MultiQC +description: Aggregate results from bioinformatics analyses across many samples into a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] + +input: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + +output: + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/samtools/collatefastq/main.nf b/modules/nf-core/samtools/collatefastq/main.nf new file mode 100644 index 00000000..4469fafc --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/main.nf @@ -0,0 +1,55 @@ +process SAMTOOLS_COLLATEFASTQ { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input) + tuple val(meta2), path(fasta) + val(interleave) + + output: + tuple val(meta), path("*_{1,2}.fq.gz") , optional:true, emit: fastq + tuple val(meta), path("*_interleaved.fq.gz") , optional:true, emit: fastq_interleaved + tuple val(meta), path("*_other.fq.gz") , emit: fastq_other + tuple val(meta), path("*_singleton.fq.gz") , optional:true, emit: fastq_singleton + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def output = (interleave && ! meta.single_end) ? "> ${prefix}_interleaved.fq.gz" : + meta.single_end ? "-1 ${prefix}_1.fq.gz -s ${prefix}_singleton.fq.gz" : + "-1 ${prefix}_1.fq.gz -2 ${prefix}_2.fq.gz -s ${prefix}_singleton.fq.gz" + + """ + samtools collate \\ + $args \\ + --threads $task.cpus \\ + ${reference} \\ + -O \\ + $input \\ + . | + + samtools fastq \\ + $args2 \\ + --threads $task.cpus \\ + ${reference} \\ + -0 ${prefix}_other.fq.gz \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/collatefastq/meta.yml b/modules/nf-core/samtools/collatefastq/meta.yml new file mode 100644 index 00000000..b647cba4 --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/meta.yml @@ -0,0 +1,76 @@ +name: samtools_collatefastq +description: | + The module uses collate and then fastq methods from samtools to + convert a SAM, BAM or CRAM file to FASTQ format +keywords: + - bam2fq + - samtools + - fastq +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + + documentation: http://www.htslib.org/doc/1.1/samtools.html + + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" + - interleave: + type: boolean + description: | + If true, the output is a single interleaved paired-end FASTQ + If false, the output split paired-end FASTQ + default: false +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: | + R1 and R2 FASTQ files + pattern: "*_{1,2}.fq.gz" + - fastq_interleaved: + type: file + description: | + Interleaved paired end FASTQ files + pattern: "*_interleaved.fq.gz" + - fastq_other: + type: file + description: | + FASTQ files with reads where the READ1 and READ2 FLAG bits set are either both set or both unset. + pattern: "*_other.fq.gz" + - fastq_singleton: + type: file + description: | + FASTQ files with singleton reads. + pattern: "*_singleton.fq.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@lescai" + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..4dd0e5b0 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.fai"), emit: fai + tuple val(meta), path ("*.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $args \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..fe2fe9a1 --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,47 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 00000000..eb7e72fc --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,35 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 00000000..954225df --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,49 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf new file mode 100644 index 00000000..a257d700 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -0,0 +1,36 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + idxstats \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml new file mode 100644 index 00000000..dda87e1e --- /dev/null +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -0,0 +1,50 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..0b20aa4b --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..8bd2fa6f --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf new file mode 100644 index 00000000..c0a6ecda --- /dev/null +++ b/modules/nf-core/samtools/merge/main.nf @@ -0,0 +1,56 @@ +process SAMTOOLS_MERGE { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input_files, stageAs: "?/*") + path fasta + path fai + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + tuple val(meta), path("*.csi") , optional:true, emit: csi + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + merge \\ + --threads ${task.cpus-1} \\ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ + $input_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + """ + touch ${prefix}.${file_type} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml new file mode 100644 index 00000000..644b768b --- /dev/null +++ b/modules/nf-core/samtools/merge/meta.yml @@ -0,0 +1,62 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" + - fai: + type: optional file + description: Index of the reference file the CRAM was created with + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - cram: + type: file + description: CRAM file + pattern: "*.{cram}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" +authors: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 00000000..eb7f098b --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 00000000..1d68a5d8 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,53 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 00000000..b87369e5 --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,66 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input), path(index) + path fasta + path qname + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.sam"), emit: sam, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def readnames = qname ? "--qname-file ${qname}": "" + def file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${prefix}.${file_type} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.cram + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 00000000..76916033 --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,79 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: optional file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file + pattern: "*.{.bai,.csi,.crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" + - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + # bai, csi, and crai are created with `--write-index` + - bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/nextflow.config b/nextflow.config index a284712b..a680b830 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,135 +1,273 @@ /* - * ------------------------------------------------- - * nf-core/bamtofastq Nextflow config file - * ------------------------------------------------- - * Default config options for all environments. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/bamtofastq Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ // Global default params, used in configs params { - // Workflow flags - input = false - chr = false - index_files = false - cram_files = false - reference_fasta = false - no_stats = false - no_read_QC = false //By default: QC is performed on extracted reads - samtools_collate_fast = false - reads_in_memory = '100000' - outdir = './results' - - // Boilerplate options - name = false - multiqc_config = "$baseDir/assets/multiqc_config.yaml" - email = false - email_on_fail = false - maxMultiqcEmailFileSize = 25.MB - plaintext_email = false - monochrome_logs = false - help = false - tracedir = "${params.outdir}/pipeline_info" - awsqueue = false - awsregion = 'eu-west-1' - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = false - config_profile_description = false - config_profile_contact = false - config_profile_url = false -} + // Input options + input = null + + // Main options + chr = null + no_stats = false + no_read_QC = false // By default: QC is performed on extracted reads + samtools_collate_fast = false + reads_in_memory = 100000 + + // References + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = false + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + + // Boilerplate options + outdir = null + tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + validate_params = true + show_hidden_params = false + schema_ignore_params = 'genomes' + + + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_description = null + config_profile_contact = null + config_profile_url = null + config_profile_name = null + -// Container slug. Stable releases should specify release tag! -// Developmental code should specify :dev -process.container = 'qbicpipelines/bamtofastq:1.2.0' + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + +} // Load base.config by default for all pipelines includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig "${params.custom_config_base}/nfcore_custom.config" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } +// Load nf-core/bamtofastq custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/bamtofastq.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/bamtofastq profiles: ${params.custom_config_base}/pipeline/bamtofastq.config") +// } + + profiles { - awsbatch { includeConfig 'conf/awsbatch.config' } - conda { process.conda = "$baseDir/environment.yml" } - debug { process.beforeScript = 'echo $HOSTNAME' } - docker { docker.enabled = true } - singularity { singularity.enabled = true } - test { includeConfig 'conf/test.config' } - test_chr { includeConfig 'conf/test_chr.config' } - test_bai { includeConfig 'conf/test_bai.config' } - test_cram { includeConfig 'conf/test_cram.config' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + docker { + docker.enabled = true + docker.registry = 'quay.io' + docker.userEmulation = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + podman { + podman.enabled = true + podman.registry = 'quay.io' + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } + test { includeConfig 'conf/test.config' } // default tests bam input + test_full { includeConfig 'conf/test_full.config' } + test_no_bai { includeConfig 'conf/test_no_bai.config' } + test_chr { includeConfig 'conf/test_chr.config' } + test_cram { includeConfig 'conf/test_cram.config' } + test_no_crai { includeConfig 'conf/test_no_crai.config' } + test_no_stats { includeConfig 'conf/test_no_stats.config' } + test_no_qc { includeConfig 'conf/test_no_qc.config' } + test_collate_fast { includeConfig 'conf/test_collate_fast.config' } } -// Avoid this error: -// WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. -// Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351, once this is established and works well, nextflow might implement this behavior as new default. -docker.runOptions = '-u \$(id -u):\$(id -g)' +// Load igenomes.config if required +if (!params.igenomes_ignore) { + includeConfig 'conf/igenomes.config' +} else { + params.genomes = [:] +} + + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" +} // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { - enabled = true - file = "${params.tracedir}/execution_timeline.html" + enabled = true + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" } report { - enabled = true - file = "${params.tracedir}/execution_report.html" + enabled = true + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" } trace { - enabled = true - file = "${params.tracedir}/execution_trace.txt" + enabled = true + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" } dag { - enabled = true - file = "${params.tracedir}/pipeline_dag.svg" + enabled = true + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { - name = 'qbic-pipelines/bamtofastq' - author = 'Friederike Hanssen' - homePage = 'https://github.com/qbic-pipelines/bamtofastq' - description = 'Workflow converts one or multiple bam files back to the fastq format' - mainScript = 'main.nf' - nextflowVersion = '>=20.04.1' - version = '1.2.0' + name = 'nf-core/bamtofastq' + author = """Friederike Hanssen, Susanne Jodoin""" + homePage = 'https://github.com/nf-core/bamtofastq' + description = """Workflow converts one or multiple bam/cram files to fastq format""" + mainScript = 'main.nf' + nextflowVersion = '!>=22.10.1' + version = '2.0.0' + doi = '' } +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } } - } } diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 00000000..0e46c10d --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,334 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/bamtofastq/master/nextflow_schema.json", + "title": "nf-core/bamtofastq pipeline parameters", + "description": "Workflow converts one or multiple bam/cram files to fastq format", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["input", "outdir"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "schema": "assets/schema_input.json", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/bamtofastq/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + } + } + }, + "main_options": { + "title": "Main options", + "type": "object", + "description": "Most common options used for the pipeline", + "default": "", + "properties": { + "chr": { + "type": "string", + "default": "None", + "fa_icon": "fas fa-clock", + "description": "Only use reads mapping to a specific chromosome/region. Has to be specified as in bam: i.e chr1, chr{1..22} (gets all reads mapping to chr1 to 22), 1, \"X Y\", incorrect naming will lead to a potentially silent error." + }, + "no_read_QC": { + "type": "boolean", + "fa_icon": "fas fa-clock", + "description": "If specified, no quality control will be performed on extracted reads. Useful, if this is done anyways in the subsequent workflow." + }, + "no_stats": { + "type": "boolean", + "fa_icon": "fas fa-clock", + "description": "If specified, skips all quality control and stats computation, including `FastQC` on both input bam and output reads, `samtools flagstat`, `samtools idxstats`, and `samtools stats`." + }, + "reads_in_memory": { + "type": "integer", + "default": 100000, + "fa_icon": "fas fa-clock", + "description": "Reads to store in memory [default = '100000']. Only relevant for use with `--samtools_collate_fast`." + }, + "samtools_collate_fast": { + "type": "boolean", + "fa_icon": "fas fa-clock", + "description": "Uses fast mode for samtools collate in `sortExtractMapped`, `sortExtractUnmapped` and `sortExtractSingleEnd`." + } + } + }, + "reference_genome_options": { + "title": "Reference genome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Reference genome related files and options required for the workflow.", + "properties": { + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + }, + "fasta": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "fa_icon": "far fa-file-code" + }, + "fasta_fai": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta).fai?(\\.gz)?$", + "description": "Path to FASTA FAI genome index file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "fa_icon": "far fa-file-code" + }, + "igenomes_base": { + "type": "string", + "format": "directory-path", + "description": "Directory / URL base for iGenomes references.", + "default": "s3://ngi-igenomes/igenomes", + "fa_icon": "fas fa-cloud-download-alt", + "hidden": true + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "hidden": true, + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "128.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" + }, + "multiqc_config": { + "type": "string", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true + }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, + "tracedir": { + "type": "string", + "description": "Directory to keep pipeline Nextflow logs and reports.", + "default": "${params.outdir}/pipeline_info", + "fa_icon": "fas fa-cogs", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "show_hidden_params": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/main_options" + }, + { + "$ref": "#/definitions/reference_genome_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..0d62beb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 diff --git a/subworkflows/local/alignment_to_fastq.nf b/subworkflows/local/alignment_to_fastq.nf new file mode 100644 index 00000000..4cf17688 --- /dev/null +++ b/subworkflows/local/alignment_to_fastq.nf @@ -0,0 +1,107 @@ +// +// BAM/CRAM to FASTQ conversion, paired end only +// + +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_MAP } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_UNMAP } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_MAP } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_UNMAP } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_UNMAP } from '../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_UNMAP } from '../../modules/nf-core/samtools/collatefastq/main' +include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_MAP } from '../../modules/nf-core/samtools/collatefastq/main' +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' + +workflow ALIGNMENT_TO_FASTQ { + take: + input // channel: [meta, alignment (BAM or CRAM), index (optional)] + fasta // optional: reference file if CRAM format and reference not in header + fasta_fai + + main: + + ch_versions = Channel.empty() + // Index File if not PROVIDED -> this also requires updates to samtools view possibly URGH + + // MAP - MAP + SAMTOOLS_VIEW_MAP_MAP(input, fasta, []) + + // UNMAP - UNMAP + SAMTOOLS_VIEW_UNMAP_UNMAP(input, fasta, []) + + // UNMAP - MAP + SAMTOOLS_VIEW_UNMAP_MAP(input, fasta, []) + + // MAP - UNMAP + SAMTOOLS_VIEW_MAP_UNMAP(input, fasta, []) + + // Channel for merging UNMAPPED BAM + all_unmapped_bam = SAMTOOLS_VIEW_UNMAP_UNMAP.out.bam + .join(SAMTOOLS_VIEW_UNMAP_MAP.out.bam, remainder: true) + .join(SAMTOOLS_VIEW_MAP_UNMAP.out.bam, remainder: true) + .map{ meta, unmap_unmap, unmap_map, map_unmap -> + [meta, [unmap_unmap, unmap_map, map_unmap]] + } + + // Channel for merging UNMAPPED CRAM + all_unmapped_cram = SAMTOOLS_VIEW_UNMAP_UNMAP.out.cram + .join(SAMTOOLS_VIEW_UNMAP_MAP.out.cram, remainder: true) + .join(SAMTOOLS_VIEW_MAP_UNMAP.out.cram, remainder: true) + .map{ meta, unmap_unmap, unmap_map, map_unmap -> + [meta, [unmap_unmap, unmap_map, map_unmap]] + } + + // Combine UNMAPPED channels + ch_unmapped_bam_cram = Channel.empty().mix(all_unmapped_bam,all_unmapped_cram) + + // MERGE UNMAP + SAMTOOLS_MERGE_UNMAP(ch_unmapped_bam_cram, fasta, fasta_fai) + + def interleave = false + + // SortExtractUnmapped: Collate & convert unmapped + COLLATE_FASTQ_UNMAP(SAMTOOLS_MERGE_UNMAP.out.cram.mix(SAMTOOLS_MERGE_UNMAP.out.bam), fasta.map{ it -> + def new_id = "" + if(it) { + new_id = it[0].baseName + } + [[id:new_id], it] }, + interleave) + + // /SortExtractMapped: Collate & convert mapped + COLLATE_FASTQ_MAP(SAMTOOLS_VIEW_MAP_MAP.out.cram.mix(SAMTOOLS_VIEW_MAP_MAP.out.bam), fasta.map{ it -> + def new_id = "" + if(it) { + new_id = it[0].baseName + } + [[id:new_id], it] }, + interleave) + + // Channel for joining mapped & unmapped fastq + reads_to_concat = COLLATE_FASTQ_MAP.out.fastq + .join(COLLATE_FASTQ_UNMAP.out.fastq) + .map{ meta, mapped_reads, unmapped_reads -> + [meta, [ + mapped_reads[0], + mapped_reads[1], + unmapped_reads[0], + unmapped_reads[1]] + ] + } + + // Concatenate Mapped_R1 with Unmapped_R1 and Mapped_R2 with Unmapped_R2 + CAT_FASTQ(reads_to_concat) + + // Gather versions of all tools used + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + ch_versions = ch_versions.mix(COLLATE_FASTQ_MAP.out.versions) + ch_versions = ch_versions.mix(COLLATE_FASTQ_UNMAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_MERGE_UNMAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_MAP_MAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_MAP_UNMAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_UNMAP_MAP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_UNMAP_UNMAP.out.versions) + + emit: + reads = CAT_FASTQ.out.reads + versions = ch_versions +} diff --git a/subworkflows/local/pre_conversion_qc.nf b/subworkflows/local/pre_conversion_qc.nf new file mode 100644 index 00000000..28b39984 --- /dev/null +++ b/subworkflows/local/pre_conversion_qc.nf @@ -0,0 +1,53 @@ +// +// Pre-conversion QC +// + +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' +include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' +include { FASTQC as FASTQC_PRE_CONVERSION } from '../../modules/nf-core/fastqc/main' + +workflow PRE_CONVERSION_QC { + take: + input // channel: [meta, alignment (BAM or CRAM), index (optional)] + fasta // optional: reference file if CRAM format and reference not in header + + main: + + ch_versions = Channel.empty() + + // SAMTOOLS IDXSTATS + SAMTOOLS_IDXSTATS(input) + + // SAMTOOLS FLAGSTAT + SAMTOOLS_FLAGSTAT(input) + + // SAMTOOLS STATS + SAMTOOLS_STATS(input, fasta) + + // FASTQC ONLY ON BAM + input.branch{ + bam: it[0].filetype == 'bam' + cram: it[0].filetype == 'cram' + }.set{fastqc_input} + + FASTQC_PRE_CONVERSION(fastqc_input.bam + .map{ it -> + [it[0], // meta + it[1]] // bam + }) + + // Gather versions of all tools used + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + ch_versions = ch_versions.mix(FASTQC_PRE_CONVERSION.out.versions) + + emit: + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat + idxstats = SAMTOOLS_IDXSTATS.out.idxstats + stats = SAMTOOLS_STATS.out.stats + zip = FASTQC_PRE_CONVERSION.out.zip + html = FASTQC_PRE_CONVERSION.out.html + versions = ch_versions +} diff --git a/subworkflows/local/prepare_indices.nf b/subworkflows/local/prepare_indices.nf new file mode 100644 index 00000000..0ba38b45 --- /dev/null +++ b/subworkflows/local/prepare_indices.nf @@ -0,0 +1,57 @@ +// +// Prepare indices +// + +// Initialize channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' + + +workflow PREPARE_INDICES { + take: + input // channel: [meta, alignment (BAM or CRAM), []] + fasta // optional: reference file if CRAM format and reference not in header + + main: + + ch_versions = Channel.empty() + + ch_out = Channel.empty() + + // Determine if INDEX provided + input.branch{ + is_indexed: it[0].index == true + to_index: it[0].index == false + }.set{samtools_input} + + // Remove empty INDEX [] from channel + input_to_index = samtools_input.to_index.map{ it -> [it[0], it[1]] } + + // INDEX BAM/CRAM only if not provided + SAMTOOLS_INDEX(input_to_index) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + ch_index_files = Channel.empty().mix(SAMTOOLS_INDEX.out.bai, SAMTOOLS_INDEX.out.crai) + + // Combine channels + ch_new = input_to_index.join(ch_index_files) + ch_out = samtools_input.is_indexed.mix(ch_new) + + + // INDEX FASTA + fasta_fai = Channel.empty() + if(params.fasta && !params.fasta_fai){ + SAMTOOLS_FAIDX(fasta.map{ it -> [[id:it[0].baseName], it] }) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } + + } + + // Gather versions of all tools used + emit: + ch_input_indexed = ch_out + fasta_fai = fasta_fai + versions = ch_versions +} diff --git a/testdata/First_SmallTest_Paired.bai b/testdata/First_SmallTest_Paired.bai deleted file mode 100644 index 5f072d8b..00000000 Binary files a/testdata/First_SmallTest_Paired.bai and /dev/null differ diff --git a/testdata/First_SmallTest_Paired.bam b/testdata/First_SmallTest_Paired.bam deleted file mode 100644 index 4d4d3b84..00000000 Binary files a/testdata/First_SmallTest_Paired.bam and /dev/null differ diff --git a/testdata/First_SmallTest_Paired.bam.bai b/testdata/First_SmallTest_Paired.bam.bai deleted file mode 100644 index 5f072d8b..00000000 Binary files a/testdata/First_SmallTest_Paired.bam.bai and /dev/null differ diff --git a/testdata/First_SmallTest_Paired.cram b/testdata/First_SmallTest_Paired.cram deleted file mode 100644 index 07da5489..00000000 Binary files a/testdata/First_SmallTest_Paired.cram and /dev/null differ diff --git a/testdata/Second_SmallTest_Paired.bam b/testdata/Second_SmallTest_Paired.bam deleted file mode 100644 index 2b4412f6..00000000 Binary files a/testdata/Second_SmallTest_Paired.bam and /dev/null differ diff --git a/testdata/Second_SmallTest_Paired.bam.bai b/testdata/Second_SmallTest_Paired.bam.bai deleted file mode 100644 index 9f11d9ed..00000000 Binary files a/testdata/Second_SmallTest_Paired.bam.bai and /dev/null differ diff --git a/testdata/Second_SmallTest_Paired.cram b/testdata/Second_SmallTest_Paired.cram deleted file mode 100644 index 21846f1b..00000000 Binary files a/testdata/Second_SmallTest_Paired.cram and /dev/null differ diff --git a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam b/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam deleted file mode 100644 index 1329d402..00000000 Binary files a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam and /dev/null differ diff --git a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai b/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai deleted file mode 100644 index ab60f0a6..00000000 Binary files a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai and /dev/null differ diff --git a/tests/test.yml b/tests/test.yml new file mode 100644 index 00000000..c1b03f8f --- /dev/null +++ b/tests/test.yml @@ -0,0 +1,30 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,docker + tags: + - test + - bam + - default + - paired-end + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: f89e29166ff872b38ee72e15d44ae8c3 + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: f81d34302eec687c43539432e81022d4 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_chr.yml b/tests/test_chr.yml new file mode 100644 index 00000000..a5c8ae78 --- /dev/null +++ b/tests/test_chr.yml @@ -0,0 +1,18 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,test_chr,docker + tags: + - test + - chromosomes + files: + - path: results/reads/test3.chrX_chrY_X_Y_other.fq.gz + md5sum: fee527cf707a00e16065616a1fd1cb9e + - path: results/samtools/test3.chrX_chrY_X_Y.bam + md5sum: 751210b16706b6309f41a92a8c44952c + - path: results/samtools/test3.flagstat + md5sum: 7b43b975b0a395040bf1ee3c48f058e7 + - path: results/samtools/test3.idxstats + md5sum: 094096b6f303cc18892e1e027c87f273 + - path: results/samtools/test3.stats + md5sum: 5265894a506071133a29ef1ff5fb48b8 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_collate_fast.yml b/tests/test_collate_fast.yml new file mode 100644 index 00000000..06753eac --- /dev/null +++ b/tests/test_collate_fast.yml @@ -0,0 +1,27 @@ +command: nextflow run main.nf -profile test,docker --samtools_collate_fast +tags: + - test + - collate_fast +files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: df8cf0d9fc7a7f6b23115592a6ff3261 + - path: results/reads/test2_1.merged.fastq.gz + md5sum: 6d6959d6955cd91f5c59f4b0fa4912bf + - path: results/reads/test2_2.merged.fastq.gz + md5sum: 6aeb5b48ffef5697a4d3c61c488735f8 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 8a4e01c993334bf2f9b40eb8e0ed69fb + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: f89e29166ff872b38ee72e15d44ae8c3 + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: f81d34302eec687c43539432e81022d4 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_cram.yml b/tests/test_cram.yml new file mode 100644 index 00000000..7394639a --- /dev/null +++ b/tests/test_cram.yml @@ -0,0 +1,29 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,test_cram,docker + tags: + - test + - cram + - paired-end + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: 6e4ad43d69cf225eb4b2bdc11401dd6e + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: ca5c3f558faef2cb5b50e5b015dcc231 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_no_bai.yml b/tests/test_no_bai.yml new file mode 100644 index 00000000..754e8881 --- /dev/null +++ b/tests/test_no_bai.yml @@ -0,0 +1,39 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,test_no_bai,docker + tags: + - test + - bam + - no_bai + - paired-end + - single-end + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/reads/test3_other.fq.gz + md5sum: 274765d73ffe4448503b1bf18f7f4880 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: f89e29166ff872b38ee72e15d44ae8c3 + - path: results/samtools/test3.flagstat + md5sum: 7b43b975b0a395040bf1ee3c48f058e7 + - path: results/samtools/test3.idxstats + md5sum: 094096b6f303cc18892e1e027c87f273 + - path: results/samtools/test3.stats + md5sum: 5265894a506071133a29ef1ff5fb48b8 + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: f81d34302eec687c43539432e81022d4 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_no_crai.yml b/tests/test_no_crai.yml new file mode 100644 index 00000000..bf54a35f --- /dev/null +++ b/tests/test_no_crai.yml @@ -0,0 +1,30 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,test_no_crai,docker + tags: + - test + - cram + - no_crai + - paired-end + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: 6e4ad43d69cf225eb4b2bdc11401dd6e + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: ca5c3f558faef2cb5b50e5b015dcc231 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_no_qc.yml b/tests/test_no_qc.yml new file mode 100644 index 00000000..556a2144 --- /dev/null +++ b/tests/test_no_qc.yml @@ -0,0 +1,27 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,docker --no_read_QC + tags: + - test + - no_qc + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: f89e29166ff872b38ee72e15d44ae8c3 + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: f81d34302eec687c43539432e81022d4 + - path: results/multiqc diff --git a/tests/test_no_stats.yml b/tests/test_no_stats.yml new file mode 100644 index 00000000..464a0ee6 --- /dev/null +++ b/tests/test_no_stats.yml @@ -0,0 +1,16 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,docker --no_stats + tags: + - test + - no_stats + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/fastqc + - path: results/multiqc diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..787aedfe --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/bamtofastq.nf b/workflows/bamtofastq.nf new file mode 100644 index 00000000..5413234c --- /dev/null +++ b/workflows/bamtofastq.nf @@ -0,0 +1,316 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowBamtofastq.initialise(params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.fasta, + params.fasta_fai, + params.input, + params.multiqc_config + ] + +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = extract_csv(file(params.input, checkIfExists: true)) } else { exit 1, 'Input samplesheet not specified!' } + + +// Initialize file channels based on params +fasta = params.fasta ? Channel.fromPath(params.fasta).collect() : Channel.value([]) +fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.value([]) + +// Initialize value channels based on params +chr = params.chr ?: Channel.empty() + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ERROR MESSAGES AND WARNINGS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { CHECK_IF_PAIRED_END } from '../modules/local/check_paired_end' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { FASTQC as FASTQC_POST_CONVERSION } from '../modules/nf-core/fastqc/main' +include { SAMTOOLS_VIEW as SAMTOOLS_CHR } from '../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_PE } from '../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX as SAMTOOLS_CHR_INDEX } from '../modules/nf-core/samtools/index/main' +include { SAMTOOLS_COLLATEFASTQ as SAMTOOLS_COLLATEFASTQ_SINGLE_END } from '../modules/nf-core/samtools/collatefastq/main' + +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +// +// SUBWORKFLOWS: Installed directly from subworkflows/local +// + +include { PREPARE_INDICES } from '../subworkflows/local/prepare_indices' +include { PRE_CONVERSION_QC } from '../subworkflows/local/pre_conversion_qc' +include { ALIGNMENT_TO_FASTQ } from '../subworkflows/local/alignment_to_fastq' + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow BAMTOFASTQ { + + ch_versions = Channel.empty() + + // SUBWORKFLOW: Prepare indices bai/crai/fai if not provided + PREPARE_INDICES( + ch_input, + fasta + ) + + ch_versions = ch_versions.mix(PREPARE_INDICES.out.versions) + + fasta_fai = params.fasta ? params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : PREPARE_INDICES.out.fasta_fai : [] + + ch_input = PREPARE_INDICES.out.ch_input_indexed + + // SUBWORKFLOW: Pre conversion QC and stats + + PRE_CONVERSION_QC( + ch_input, + fasta + ) + + ch_versions = ch_versions.mix(PRE_CONVERSION_QC.out.versions) + + // MODULE: Check if SINGLE or PAIRED-END + + CHECK_IF_PAIRED_END(ch_input, fasta) + + ch_paired_end = ch_input.join(CHECK_IF_PAIRED_END.out.paired_end) + ch_single_end = ch_input.join(CHECK_IF_PAIRED_END.out.single_end) + + // Combine channels into new input channel for conversion + add info about single/paired to meta map + ch_input_new = ch_single_end.map{ meta, bam, bai, txt -> + [ [ id : meta.id, + filetype : meta.filetype, + single_end : true ], + bam, + bai + ] } + .mix(ch_paired_end.map{ meta, bam, bai, txt -> + [ [ id : meta.id, + filetype : meta.filetype, + single_end : false ], + bam, + bai + ] }) + + ch_versions = ch_versions.mix(CHECK_IF_PAIRED_END.out.versions) + + + // Extract only reads mapping to a chromosome + if (params.chr) { + + SAMTOOLS_CHR(ch_input_new, fasta, []) + + samtools_chr_out = Channel.empty().mix( SAMTOOLS_CHR.out.bam, + SAMTOOLS_CHR.out.cram) + SAMTOOLS_CHR_INDEX(samtools_chr_out) + ch_input_chr = samtools_chr_out.join(Channel.empty().mix( SAMTOOLS_CHR_INDEX.out.bai, + SAMTOOLS_CHR_INDEX.out.crai )) + + // Add chr names to id + ch_input_new = ch_input_chr.map{ it -> + new_id = it[1].baseName + [[ + id : new_id, + filetype : it[0].filetype, + single_end: it[0].single_end + ], + it[1], + it[2]] } + + ch_versions = ch_versions.mix(SAMTOOLS_CHR.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_CHR_INDEX.out.versions) + + } + + // MODULE: SINGLE-END Alignment to FastQ (SortExtractSingleEnd) + def interleave = false + + ch_input_new.branch{ + ch_single: it[0].single_end == true + ch_paired: it[0].single_end == false + }.set{conversion_input} + + // Module needs info about single-endedness + SAMTOOLS_COLLATEFASTQ_SINGLE_END( + conversion_input.ch_single.map{ it -> [ it[0], it[1] ]}, // meta, bam/cram + fasta.map{ it -> // meta, fasta + def new_id = "" + if(it) { + new_id = it[0].baseName + } + [[id:new_id], it] }, + interleave) + + ch_versions = ch_versions.mix(SAMTOOLS_COLLATEFASTQ_SINGLE_END.out.versions) + + // + // SUBWORKFLOW: PAIRED-END Alignment to FastQ + // + + ALIGNMENT_TO_FASTQ ( + conversion_input.ch_paired, + fasta, + fasta_fai + ) + + ch_versions = ch_versions.mix(ALIGNMENT_TO_FASTQ.out.versions) + + + // MODULE: FastQC - Post conversion QC + ch_reads_post_qc = Channel.empty().mix(SAMTOOLS_COLLATEFASTQ_SINGLE_END.out.fastq_singleton, ALIGNMENT_TO_FASTQ.out.reads) + + FASTQC_POST_CONVERSION(ch_reads_post_qc) + + ch_versions = ch_versions.mix(FASTQC_POST_CONVERSION.out.versions) + + // MODULE: Software versions + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC + // + workflow_summary = WorkflowBamtofastq.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + methods_description = WorkflowBamtofastq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + ch_methods_description = Channel.value(methods_description) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(PRE_CONVERSION_QC.out.flagstat.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(PRE_CONVERSION_QC.out.idxstats.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(PRE_CONVERSION_QC.out.stats.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(PRE_CONVERSION_QC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_POST_CONVERSION.out.zip.collect{it[1]}.ifEmpty([])) + + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + multiqc_report = MULTIQC.out.report.toList() +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// Function to extract information (meta data + file(s)) from csv file(s) +def extract_csv(csv_file) { + + // check that the sample sheet is not 1 line or less, because it'll skip all subsequent checks if so. + file(csv_file).withReader('UTF-8') { reader -> + def line, numberOfLinesInSampleSheet = 0; + while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++} + if (numberOfLinesInSampleSheet < 2) { + error("Samplesheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines.") + } + } + Channel.from(csv_file).splitCsv(header: true) + .map{ row -> + if ( !row.sample_id ) { // This also handles the case where the lane is left as an empty string + error('The sample sheet should specify a sample_id for each row.\n' + row.toString()) + } + if ( !row.mapped ) { // This also handles the case where the lane is left as an empty string + error('The sample sheet should specify a mapped file for each row.\n' + row.toString()) + } + if (!row.file_type) { // This also handles the case where the lane is left as an empty string + error('The sample sheet should specify a file_type for each row, valid values are bam/cram.\n' + row.toString()) + } + if (!(row.file_type == "bam" || row.file_type == "cram")) { + error('The file_type for the row below is neither "bam" nor "cram". Please correct this.\n' + row.toString() ) + } + if (row.file_type != file(row.mapped).getExtension().toString()) { + error('The file extension does not fit the specified file_type.\n' + row.toString() ) + } + + + // init meta map + def meta = [:] + + meta.id = "${row.sample_id}".toString() + def mapped = file(row.mapped, checkIfExists: true) + def index = row.index ? file(row.index, checkIfExists: true) : [] + meta.filetype = "${row.file_type}".toString() + meta.index = row.index ? true : false + + return [meta, mapped, index] + + } + +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/