diff --git a/.editorconfig b/.editorconfig index b6b31907..b78de6e6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js}] +[*.{md,yml,yaml,html,css,scss,js,cff}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index be0acf3c..d7271fd4 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/cuta - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/cutandrun/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/cutandrun _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/cutandrun/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/cutandrun _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 0a5bf86b..ce560a55 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -15,9 +15,6 @@ jobs: steps: - name: Launch workflow via tower uses: nf-core/tower-action@v3 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -28,3 +25,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/cutandrun/results-${{ github.sha }}" } profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 8e8011e1..7d897f56 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,3 +23,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/cutandrun/results-test-${{ github.sha }}" } profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 30f05d3d..ff515085 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,11 @@ on: release: types: [published] +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: ############################## ### SMALL INTEGRATION TEST ### @@ -18,17 +23,12 @@ jobs: if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/cutandrun') }}" runs-on: ubuntu-latest env: - NXF_VER: ${{ matrix.nxf_ver }} + NXF_VER: ${{ matrix.NXF_VER }} NXF_ANSI_LOG: false CAPSULE_LOG: none strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - # Test latest nextflow version - - NXF_VER: "" + NXF_VER: ["21.10.3", ""] steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -70,7 +70,7 @@ jobs: strategy: matrix: # Nextflow versions: check pipeline minimum and current latest - nxf_ver: ["21.10.3", ""] + NXF_VER: ["21.10.3", ""] steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -106,8 +106,6 @@ jobs: if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/cutandrun') }} runs-on: ubuntu-latest env: - NXF_VER: "" - NXF_EDGE: 1 NXF_ANSI_LOG: false CAPSULE_LOG: none steps: @@ -115,10 +113,9 @@ jobs: uses: actions/checkout@v2 - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - nextflow self-update + uses: nf-core/setup-nextflow@v1 + with: + version: "latest-everything" # Work around for the unexpected end of file error that github actions seems to get when downloading compressed # files during pipeline execution @@ -141,34 +138,23 @@ jobs: ### UNIT TESTS ### ############################## unit_tests: - name: ${{ matrix.nxf_version }} ${{ matrix.tags }} + name: ${{ matrix.NXF_VER }} ${{ matrix.tags }} if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/cutandrun') }} runs-on: ubuntu-20.04 env: + NXF_VER: ${{ matrix.NXF_VER }} NXF_ANSI_LOG: false CAPSULE_LOG: none strategy: fail-fast: false matrix: - nxf_version: ["21.10.3", ""] + NXF_VER: ["21.10.3", ""] tags: - - test_params + - test_genome_options - test_samplesheet - - verify_output_save_ref - - verify_output_only_input - - verify_output_save_merged - - verify_output_skip_fastqc - - verify_output_save_trimmed - - verify_output_skip_trimming - - verify_output_align_intermed - - verify_output_align_only_align - - verify_output_align_save_spikein_align - - verify_output_align_save_unaligned - - verify_output_only_filtering - - verify_output_align_duplicates_mark - - verify_output_align_duplicates_remove - - verify_output_align_duplicates_remove_target - - verify_output_peak_calling_only_peak_calling + - test_samplesheet_2 + - test_filtering_noqfilter + - test_filtering_withqfilter - test_bam_scale_none - test_bam_scale_spikein - test_bam_scale_cpm @@ -182,14 +168,37 @@ jobs: - test_peak_callers_macs2_seacr - test_peak_callers_seacr_macs2_noigg - test_peak_callers_ctrl_tests - - test_conseneus_peaks_group - - test_conseneus_peaks_all - - test_conseneus_peaks_invalid + - test_consensus_peaks_group + - test_consensus_peaks_all + - test_consensus_peaks_invalid + - verify_output_only_input + - verify_output_save_merged + - verify_output_save_trimmed + - verify_output_skip_trimming + - verify_output_skip_fastqc + - verify_output_save_ref + - verify_output_align_only_align + - verify_output_align_intermed + - verify_output_align_save_spikein_align + - verify_output_align_save_unaligned + - verify_output_align_duplicates_mark + - verify_output_align_duplicates_remove + - verify_output_align_duplicates_remove_target + - verify_output_only_filtering + - verify_output_peak_calling_only_peak_calling + - verify_output_reporting_skip_preseq_false + - verify_output_reporting_skip_preseq_true + - verify_output_reporting_skip_dtqc_false + - verify_output_reporting_skip_dtqc_true + - verify_output_reporting_skip_heatmaps_false + - verify_output_reporting_skip_heatmaps_true + - verify_output_reporting_skip_igv_false + - verify_output_reporting_skip_igv_true + - verify_output_reporting_skip_multiqc_false + - verify_output_reporting_skip_multiqc_true + - verify_output_reporting_skip_peak_qc_false + - verify_output_reporting_skip_peak_qc_true - verify_output_reporting_skip_reporting - - verify_output_reporting_skip_igv - - verify_output_reporting_skip_heatmaps - - verify_output_reporting_skip_multiqc - - verify_output_skip_frip steps: - name: Checkout Code uses: actions/checkout@v2 @@ -219,9 +228,6 @@ jobs: ${{ runner.os }}-nextflow- - name: Install Nextflow - env: - NXF_VER: ${{ matrix.nxf_version }} - CAPSULE_LOG: none run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ @@ -229,16 +235,23 @@ jobs: - name: Run pytest-workflow uses: Wandalen/wretry.action@v1.0.11 with: - command: pytest --tag ${{ matrix.tags }} --kwdof + command: TMPDIR=~ PROFILE=docker pytest --tag ${{ matrix.tags }} --symlink --kwdof --color=yes attempt_limit: 3 + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err} - name: Upload logs on failure if: failure() uses: actions/upload-artifact@v2 with: - name: logs-${{ matrix.tags }}-${{ matrix.profile }}-${{ matrix.nxf_version }} + name: logs-unit-tests path: | - /tmp/pytest_workflow_*/*/.nextflow.log - /tmp/pytest_workflow_*/*/log.out - /tmp/pytest_workflow_*/*/log.err - /tmp/pytest_workflow_*/*/work + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/work + !/home/runner/pytest_workflow_*/*/work/conda + !/home/runner/pytest_workflow_*/*/work/singularity diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 77358dee..8a5ce69b 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -35,6 +35,36 @@ jobs: - name: Run Prettier --check run: prettier --check ${GITHUB_WORKSPACE} + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: @@ -42,15 +72,11 @@ jobs: uses: actions/checkout@v2 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@v3 with: - python-version: "3.6" + python-version: "3.7" architecture: "x64" - name: Install dependencies diff --git a/.prettierignore b/.prettierignore index d0e7ae58..eb74a574 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,5 @@ email_template.html +adaptivecard.json .nextflow* work/ data/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 8964bbbb..7c08fbb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -103,3 +103,56 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi > **NB:** Dependency has been **updated** if both old and new version information is present. > **NB:** Dependency has been **added** if just the new version information is present. > **NB:** Dependency has been **removed** if version information isn't present. + +## [3.0] - 2022-09-26 + +### Major Changes + +- Major rework of the pipeline internal flow structure. Metadata from processes (such as read counts) was previously annotated to a channel dictionary that was passed through the pipeline where various reporting processes could use the data. This was interacting with quite a few bugs in the Nextflow pipeline resume feature, causing lots of processes to rerun unnecessarily on resume. Any metadata generated in the pipeline is now written to files and passed where necessary to consuming reporting processes. This has drastically improved the number of processes that incorrectly rerun on resume. + +- Re-organized the pipeline into clearer sections, breaking related processes into sub-workflows where possible. This is for better readability, but also to prepare the pipeline for the major upcoming nf-core feature of re-usable sub-workflows. as part of this rework, the pipeline now has distinct sections for fragment-based QC and peak-based QC. + +- All reporting has been moved into MultiQC where possible. All PDF-based charting has been removed. Other PDF reports such as heatmaps and upset plots are still generated. + +- We have listened to user comments that there is no guide on how to interpret the results from the pipeline. In response, we have revamped the documentation in the `output.md` document to describe the reporting in much more depth including good and bad examples of reporting output where possible. + +- [[#140](https://github.com/nf-core/cutandrun/issues/140)] - IGV browser output has been reworked. We first fixed the performance issues with long load times by including the genome index into the session folder. IGV output now includes peaks from all peak callers used in pipeline, not just the primary one. Users can now select whether the gene track exported with the IGV session contains gene symbols or gene names. Several visual changes have been made to improve the default appearance and order of tracks. + +- Added PreSeq library complexity reporting. + +- Added full suite of fragment-based deepTools QC using the `multiBAMSummary` module. We generate three reporting from this fragment dataset: PCA, correlation and fingerprint plots. This has replaced our previous python implementation of sample correlation calculation. + +- All coverage tracks generated from reads now extend reads to full fragment length by default. We feel this creates more realistic coverage tracks for CUT&RUN and improves the accuracy of other fragment-based reports. + +### Enhancements + +- Updated pipeline template to nf-core/tools `2.5.1`. +- [[#149](https://github.com/nf-core/cutandrun/issues/149)] - Pipeline will now use a blacklist file if provided to create an include list for the genome. +- The FRiP score is now calculated based on extended read fragments and not just mapped reads. +- [[#138](https://github.com/nf-core/cutandrun/issues/138)] - Better sample sheet error reporting. +- Gene bed files will now be automatically created from the GTF file if not supplied. +- The default minimum q-score for read quality has been changed from 0 to 20. +- [[#156](https://github.com/nf-core/cutandrun/issues/156)] SEACR has been better parameterized with dedicated config values for stringency and normalization. Credit to `CloXD` for this. +- deepTools heatmap generation has been better parameterized with dedicated config values for the gene and peak region settings. +- Consensus peak count reporting has been added to MultiQC. +- Reviewed and updated CI tests for better code coverage. +- Updated all nf-core modules to latest versions. + +### Fixes + +- Fixed some bugs in the passing of MACS2 peak data through the pipeline in v2.0. MACS2 peaks will now be correctly used and reporting on in the pipeline. +- [[#135](https://github.com/nf-core/cutandrun/issues/135)] - Removed many of the yellow warnings that were appearing in the pipeline to do with resource config options for processes that were not run. +- [[#137](https://github.com/nf-core/cutandrun/issues/137)] - Fixed the `workflow.OnComplete` error. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `multiqc` | 1.12 | 1.13 | +| `picard` | 2.27.2 | 2.27.4 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> **NB:** Dependency has been **added** if just the new version information is present. +> **NB:** Dependency has been **removed** if version information isn't present. diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..017666c0 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,56 @@ +cff-version: 1.2.0 +message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" +authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Garcia + given-names: Maxime Ulysse + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven +title: "The nf-core framework for community-curated bioinformatics pipelines." +version: 2.4.1 +doi: 10.1038/s41587-020-0439-x +date-released: 2022-05-16 +url: https://github.com/nf-core/tools +prefered-citation: + type: article + authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Garcia + given-names: Maxime Ulysse + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven + doi: 10.1038/s41587-020-0439-x + journal: nature biotechnology + start: 276 + end: 278 + title: "The nf-core framework for community-curated bioinformatics pipelines." + issue: 3 + volume: 38 + year: 2020 + url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/README.md b/README.md index dead8265..31649adf 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,12 @@ [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.5653535-1073c8)](https://doi.org/10.5281/zenodo.5653535) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/cutandrun) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23cutandrun-4A154B?logo=slack)](https://nfcore.slack.com/channels/cutandrun) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23cutandrun-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/cutandrun)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -31,7 +29,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool The pipeline has been developed with continuous integration (CI) and test driven development (TDD) at its core. nf-core code and module linting as well as a battery of over 100 unit and integration tests run on pull request to the main repository and on release of the pipeline. On official release, automated CI tests run the pipeline on a full-sized dataset on AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/cutandrun/results). -![pipeline_diagram](docs/images/cutandrun-flow-diagram-v1-0_2.png) +![pipeline_diagram](docs/images/cutandrun-flow-diagram-v3.0.png) ## Pipeline summary @@ -46,11 +44,12 @@ The pipeline has been developed with continuous integration (CI) and test driven 9. Create bigWig coverage files ([`bedGraphToBigWig`](http://hgdownload.soe.ucsc.edu/admin/exe/)) 10. Peak calling ([`SEACR`](https://github.com/FredHutch/SEACR), [`MACS2`](https://github.com/macs3-project/MACS)) 11. Consensus peak merging and reporting ([`bedtools`](https://github.com/arq5x/bedtools2/)) -12. Quality control and analysis: - 1. Alignment, fragment length and peak analysis and replicate reproducibility ([`python`](https://www.python.org/)) - 2. Heatmap peak analysis ([`deepTools`](https://github.com/deeptools/deepTools/)) -13. Genome browser session ([`IGV`](https://software.broadinstitute.org/software/igv/)) -14. Present QC for raw read, alignment and duplicate reads ([`MultiQC`](http://multiqc.info/)) +12. Library complexity ([preseq]([Preseq | The Smith Lab](http://smithlabresearch.org/software/preseq))) +13. Fragment-based quality control ([`deepTools`](https://github.com/deeptools/deepTools/)) +14. Peak-based quality control ([`bedtools`](https://github.com/arq5x/bedtools2/), custom python) +15. Heatmap peak analysis ([`deepTools`](https://github.com/deeptools/deepTools/)) +16. Genome browser session ([`IGV`](https://software.broadinstitute.org/software/igv/)) +17. Present all QC in web-based report ([`MultiQC`](http://multiqc.info/)) ## Quick Start @@ -60,7 +59,7 @@ The pipeline has been developed with continuous integration (CI) and test driven 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console + ```bash nextflow run nf-core/cutandrun -profile test,YOURPROFILE --outdir ``` @@ -80,7 +79,7 @@ The pipeline has been developed with continuous integration (CI) and test driven - Typical command for CUT&Run/CUT&Tag analysis: - ```console + ```bash nextflow run nf-core/cutandrun --input samplesheet.csv --outdir --genome GRCh37 -profile ``` diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..a2539c55 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/cutandrun v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/blacklists/GRCh37-blacklist.bed b/assets/blacklists/GRCh37-blacklist.bed new file mode 100644 index 00000000..dcf5caff --- /dev/null +++ b/assets/blacklists/GRCh37-blacklist.bed @@ -0,0 +1,411 @@ +1 564449 570371 High_Mappability_island 1000 . +1 724136 727043 Satellite_repeat 1000 . +1 825006 825115 BSR/Beta 1000 . +1 2583334 2634374 Low_mappability_island 1000 . +1 4363064 4363242 (CATTC)n 1000 . +1 5725866 5736651 Low_mappability_island 1000 . +1 16839923 16841396 Low_mappability_island 1000 . +1 38077347 38077423 Low_mappability_island 1000 . +1 91852785 91853147 LSU-rRNA_Hsa 1000 . +1 104163724 104163860 Low_mappability_island 1000 . +1 108112972 108113707 LSU-rRNA_Hsa 1000 . +1 121351474 121487059 centromeric_repeat 1000 . +1 142535434 142543081 Satellite_repeat 1000 . +1 142723256 142723968 Low_mappability_island 1000 . +1 142792613 142793303 Low_mappability_island 1000 . +1 142835822 142837333 Low_mappability_island 1000 . +1 143274490 143284340 centromeric_repeat 1000 . +1 145277108 145277572 LSU-rRNA_Hsa 1000 . +1 149033183 149035829 Satellite_repeat 1000 . +1 156186169 156186712 High_Mappability_island 1000 . +1 224199390 224204260 Satellite_repeat 1000 . +1 233318467 233318516 (CATTC)n 1000 . +1 236260366 236260821 Low_mappability_island 1000 . +1 237766308 237766764 LSU-rRNA_Hsa 1000 . +1 238105345 238105511 Low_mappability_island 1000 . +1 238108025 238108378 Low_mappability_island 1000 . +1 238108645 238109697 Low_mappability_island 1000 . +10 18841533 18862467 (CATTC)n 1000 . +10 20035661 20037171 Low_mappability_island 1000 . +10 36722282 36723650 Low_mappability_island 1000 . +10 38772277 38819357 Satellite_repeat 1000 . +10 38868892 38889025 Satellite_repeat 1000 . +10 39076515 39155771 Satellite_repeat 1000 . +10 42354835 42548642 centromeric_repeat 1000 . +10 42596676 42602082 Satellite_repeat 1000 . +10 42596700 42602110 Satellite_repeat 1000 . +10 42661264 42667623 Satellite_repeat 1000 . +10 42790522 42818398 Satellite_repeat 1000 . +10 135498649 135502716 Satellite_repeat 1000 . +11 6831669 6831838 ALR/Alpha 1000 . +11 10529403 10531969 Low_mappability_island 1000 . +11 48671444 48902406 centromeric_repeat 1000 . +11 48931242 48964015 centromeric_repeat 1000 . +11 50318471 50784078 centromeric_repeat 1000 . +11 51090700 51374066 centromeric_repeat 1000 . +11 51567242 51594226 centromeric_repeat 1000 . +11 54694046 55027975 centromeric_repeat 1000 . +11 73221660 73221946 Low_mappability_island 1000 . +11 85194913 85195322 LSU-rRNA_Hsa 1000 . +11 87524468 87525005 Low_mappability_island 1000 . +11 103275584 103281729 Low_mappability_island 1000 . +11 122874287 122874443 Low_mappability_island 1000 . +12 20704285 20704583 SSU-rRNA_Hsa 1000 . +12 34372315 34372825 LSU-rRNA_Hsa 1000 . +12 34432130 34857010 centromeric_repeat 1000 . +12 37989447 38441828 centromeric_repeat 1000 . +12 38531376 38531930 LSU-rRNA_Hsa 1000 . +12 41757383 41757545 Low_mappability_island 1000 . +12 127650407 127651075 LSU-rRNA_Hsa 1000 . +12 132061320 132062046 Low_mappability_island 1000 . +13 56545728 56545925 Low_mappability_island 1000 . +13 110076444 110076782 Low_mappability_island 1000 . +14 18999935 19056900 centromeric_repeat 1000 . +14 32953263 32954381 Low_mappability_island 1000 . +14 84637832 84639038 Low_mappability_island 1000 . +14 90341302 90341516 SSU-rRNA_Hsa 1000 . +15 19999941 20044132 centromeric_repeat 1000 . +16 32493036 32570826 ALR/Alpha 1000 . +16 32590063 32598801 ALR/Alpha 1000 . +16 33237130 33241330 Low_mappability_island 1000 . +16 33864355 34023306 centromeric_repeat 1000 . +16 34180542 34197081 Satellite_repeat 1000 . +16 34530115 34542632 BSR/Beta 1000 . +16 35193580 35285885 centromeric_repeat 1000 . +16 46385718 46456668 Satellite_repeat 1000 . +16 46497639 46500515 Satellite_repeat 1000 . +16 47538629 47539297 LSU-rRNA_Hsa 1000 . +17 19355538 19356096 LSU-rRNA_Hsa 1000 . +17 19502495 19506773 Low_mappability_island 1000 . +17 21905167 21906712 centromeric_repeat 1000 . +17 22018524 22032049 Low_mappability_island 1000 . +17 22221073 22263006 centromeric_repeat 1000 . +17 25263010 25268059 Satellite_repeat 1000 . +17 25415551 25417559 telomeric_repeat 1000 . +17 31149365 31149981 High_Mappability_island 1000 . +17 33478114 33478372 LSU-rRNA_Hsa 1000 . +17 41381502 41382591 High_Mappability_island 1000 . +17 41463538 41464075 High_Mappability_island 1000 . +17 41464478 41465015 snRNA 1000 . +17 41465562 41467288 High_Mappability_island 1000 . +17 51183038 51183763 Low_mappability_island 1000 . +17 55868618 55868752 LSU-rRNA_Hsa 1000 . +17 75158031 75158430 LSU-rRNA_Hsa 1000 . +18 96416 97552 Satellite_repeat 1000 . +18 105658 112233 Satellite_repeat 1000 . +18 2842252 2842356 Low_mappability_island 1000 . +18 15393801 15393992 centromeric_repeat 1000 . +18 18510894 18520356 centromeric_repeat 1000 . +18 44126235 44126593 (CATTC)n 1000 . +18 45379603 45379864 Low_mappability_island 1000 . +18 50319086 50319301 Low_mappability_island 1000 . +18 77772846 77773065 LSU-rRNA_Hsa 1000 . +19 246006 247844 TAR1 1000 . +19 22877614 22877696 SSU-rRNA_Hsa 1000 . +19 23235030 23235504 BSR/Beta 1000 . +19 24182398 24186210 LSU-rRNA_Hsa 1000 . +19 24385474 24633168 centromeric_repeat 1000 . +19 27730611 28262682 centromeric_repeat 1000 . +19 36066445 36066810 LSU-rRNA_Hsa 1000 . +19 36756398 36800948 centromeric_repeat 1000 . +19 37759473 37797722 centromeric_repeat 1000 . +19 44914313 44916340 ACRO1 1000 . +19 44960681 44962681 ACRO1 1000 . +2 739925 740994 Low_mappability_island 1000 . +2 49456729 49457067 Low_mappability_island 1000 . +2 88124390 88124903 Low_mappability_island 1000 . +2 89830421 89880514 Satellite_repeat 1000 . +2 90371401 90394776 Satellite_repeat 1000 . +2 90443001 90545431 Low_mappability_island 1000 . +2 91595080 91616015 Satellite_repeat 1000 . +2 92267428 92326280 centromeric_repeat 1000 . +2 115695017 115695281 LSU-rRNA_Hsa 1000 . +2 117781085 117781300 Low_mappability_island 1000 . +2 132966248 132989300 centromeric_repeat 1000 . +2 132994855 133007983 ALR/Alpha 1000 . +2 133011824 133013298 SSU-rRNA_Hsa 1000 . +2 133036250 133040042 LSU-rRNA_Hsa 1000 . +2 133044095 133045945 ACRO1 1000 . +2 143848503 143848792 Low_mappability_island 1000 . +2 148022736 148022878 Low_mappability_island 1000 . +2 149639207 149639515 Low_mappability_island 1000 . +2 156120500 156120610 Low_mappability_island 1000 . +2 162135000 162139241 Low_mappability_island 1000 . +2 230045426 230045796 LSU-rRNA_Hsa 1000 . +20 26257032 26320267 centromeric_repeat 1000 . +20 29517710 29521147 centromeric_repeat 1000 . +20 29803876 29833334 centromeric_repeat 1000 . +20 55932703 55936114 chrM 1000 . +20 62916702 62918053 telomeric_repeat 1000 . +21 9647205 9648529 Low_mappability_island 1000 . +21 9694896 9704962 centromeric_repeat 1000 . +21 9825451 9827612 High_Mappability_island 1000 . +21 9827612 9845233 Low_mappability_island 1000 . +21 9881895 9882569 TAR1 1000 . +21 10084922 10088004 Satellite_repeat 1000 . +21 10492876 10493049 Low_mappability_island 1000 . +21 10599428 10599915 TAR1 1000 . +21 10697886 10860890 centromeric_repeat 1000 . +21 11186054 11188131 Satellite_repeat 1000 . +21 14338127 14369791 centromeric_repeat 1000 . +21 18800575 18800997 (GAGTG)n 1000 . +21 27228003 27228242 SSU-rRNA_Hsa 1000 . +21 46796081 46796336 Low_mappability_island 1000 . +22 16847814 16862659 Satellite_repeat 1000 . +22 18876789 18884510 Satellite_repeat 1000 . +3 25508897 25509131 Low_mappability_island 1000 . +3 73159606 73161131 snRNA 1000 . +3 75696297 75699304 BSR/Beta 1000 . +3 75717841 75720426 Satellite_repeat 1000 . +3 80995858 81014459 ALR/Alpha 1000 . +3 90311686 90507410 centromeric_repeat 1000 . +3 93504815 93519133 centromeric_repeat 1000 . +3 96335934 96337436 Low_mappability_island 1000 . +3 160665423 160665642 Low_mappability_island 1000 . +3 196625514 196625860 Satellite_repeat 1000 . +3 197825427 197834080 Low_mappability_island 1000 . +4 9987 12694 telomeric_repeat 1000 . +4 12276463 12292424 ALR/Alpha 1000 . +4 12641862 12642305 Low_mappability_island 1000 . +4 21583630 21583719 (GAATG)n 1000 . +4 27732004 27732240 Low_mappability_island 1000 . +4 47774268 47774416 Low_mappability_island 1000 . +4 49085372 49342114 centromeric_repeat 1000 . +4 49488472 49662085 centromeric_repeat 1000 . +4 52659961 52688986 centromeric_repeat 1000 . +4 56194229 56194584 Low_mappability_island 1000 . +4 65473858 65473941 Low_mappability_island 1000 . +4 68264186 68266830 centromeric_repeat 1000 . +4 70296565 70296841 LSU-rRNA_Hsa 1000 . +4 76807083 76807320 LSU-rRNA_Hsa 1000 . +4 78929660 78929920 Low_mappability_island 1000 . +4 156374749 156377226 chrM 1000 . +4 156384860 156387314 Low_mappability_island 1000 . +4 163342479 163342744 Low_mappability_island 1000 . +4 190190746 190203442 Low_mappability_island 1000 . +4 190801869 190802909 Low_mappability_island 1000 . +4 190943802 190943962 Satellite_repeat 1000 . +4 190987268 190990949 Satellite_repeat 1000 . +4 191026302 191044344 telomeric_repeat 1000 . +5 17517177 17600940 Low_mappability_island 1000 . +5 21477365 21497415 Low_mappability_island 1000 . +5 34177882 34197574 Low_mappability_island 1000 . +5 45908253 46411114 centromeric_repeat 1000 . +5 49405493 49554574 centromeric_repeat 1000 . +5 71146650 71146996 LSU-rRNA_Hsa 1000 . +5 79945807 79948223 Low_mappability_island 1000 . +5 93903068 93906726 Low_mappability_island 1000 . +5 97746525 97746679 Low_mappability_island 1000 . +5 99381556 99390873 Low_mappability_island 1000 . +5 105889063 105889263 chrM 1000 . +5 123095972 123097432 chrM 1000 . +5 134258949 134264271 Low_mappability_island 1000 . +5 174541634 174542177 SSU-rRNA_Hsa 1000 . +6 58735349 58739031 centromeric_repeat 1000 . +6 58745955 58780547 centromeric_repeat 1000 . +6 61880095 61944008 centromeric_repeat 1000 . +6 62189892 62206612 ALR/Alpha 1000 . +6 62207809 62230644 ALR/Alpha 1000 . +6 62283966 62284581 Low_mappability_island 1000 . +6 133593944 133594201 LSU-rRNA_Hsa 1000 . +6 137059142 137059326 SSU-rRNA_Hsa 1000 . +6 150665074 150665281 SSU-rRNA_Hsa 1000 . +6 157731310 157735525 Low_mappability_island 1000 . +7 43878355 43878530 TAR1 1000 . +7 45291517 45291740 Low_mappability_island 1000 . +7 56437808 56442977 Low_mappability_island 1000 . +7 57253980 57254183 Low_mappability_island 1000 . +7 57255310 57255444 Low_mappability_island 1000 . +7 57261829 57261998 Low_mappability_island 1000 . +7 57544726 57556913 Satellite_repeat 1000 . +7 57811488 57836990 centromeric_repeat 1000 . +7 57939184 58055539 centromeric_repeat 1000 . +7 61054285 62454680 centromeric_repeat 1000 . +7 64059157 64066183 BSR/Beta 1000 . +7 64951348 64956223 centromeric_repeat 1000 . +7 68201468 68201673 Low_mappability_island 1000 . +7 68527370 68527788 LSU-rRNA_Hsa 1000 . +7 80962907 80963147 SSU-rRNA_Hsa 1000 . +7 100550640 100551321 Low_mappability_island 1000 . +7 142372972 142375638 Low_mappability_island 1000 . +7 145694403 145694561 Low_mappability_island 1000 . +8 155512 157639 TAR1 1000 . +8 21455971 21456306 LSU-rRNA_Hsa 1000 . +8 32868966 32873279 Low_mappability_island 1000 . +8 43092737 43097573 Satellite_repeat 1000 . +8 43399486 43843604 centromeric_repeat 1000 . +8 46838215 47457541 centromeric_repeat 1000 . +8 47739043 47742797 Low_mappability_island 1000 . +8 47750844 47776101 BSR/Beta 1000 . +8 56754955 56755418 LSU-rRNA_Hsa 1000 . +8 69218401 69218922 LSU-rRNA_Hsa 1000 . +8 70602248 70602620 LSU-rRNA_Hsa 1000 . +8 77114154 77114389 Low_mappability_island 1000 . +8 100508010 100508287 Low_mappability_island 1000 . +9 10435 11574 TAR1 1000 . +9 4799734 4800000 SSU-rRNA_Hsa 1000 . +9 33656606 33659249 Low_mappability_island 1000 . +9 42819021 42832395 centromeric_repeat 1000 . +9 44070617 44070871 Low_mappability_island 1000 . +9 44873123 44902307 centromeric_repeat 1000 . +9 45355954 45357644 telomeric_repeat 1000 . +9 45435109 45443517 centromeric_repeat 1000 . +9 66494170 66494805 TAR1 1000 . +9 66767710 66864329 centromeric_repeat 1000 . +9 66970914 67005594 centromeric_repeat 1000 . +9 67315122 67321036 centromeric_repeat 1000 . +9 67789868 67792893 centromeric_repeat 1000 . +9 68410775 68435115 Low_mappability_island 1000 . +9 69677073 69687998 centromeric_repeat 1000 . +9 69689770 69711497 centromeric_repeat 1000 . +9 69947961 70011196 centromeric_repeat 1000 . +9 70076144 70076855 centromeric_repeat 1000 . +9 70318723 70327683 centromeric_repeat 1000 . +9 72653073 72653572 Satellite_repeat 1000 . +9 78790077 78790255 (GAATG)n 1000 . +9 79186574 79187026 LSU-rRNA_Hsa 1000 . +9 141019938 141021783 TAR1 1000 . +MT 1 16569 chrM 1000 . +X 55206111 55206740 Low_mappability_island 1000 . +X 55207753 55208152 Low_mappability_island 1000 . +X 55208300 55208643 Low_mappability_island 1000 . +X 55208980 55209208 Low_mappability_island 1000 . +X 55209655 55210006 Low_mappability_island 1000 . +X 58330488 58330843 centromeric_repeat 1000 . +X 58373806 58373962 centromeric_repeat 1000 . +X 58377680 58377864 centromeric_repeat 1000 . +X 58415350 58416387 centromeric_repeat 1000 . +X 58432411 58432680 centromeric_repeat 1000 . +X 58485887 58486241 centromeric_repeat 1000 . +X 58488898 58494528 centromeric_repeat 1000 . +X 58499466 58504235 centromeric_repeat 1000 . +X 58506076 58528214 centromeric_repeat 1000 . +X 58528184 58536883 centromeric_repeat 1000 . +X 58544061 58582415 centromeric_repeat 1000 . +X 61681834 61919683 centromeric_repeat 1000 . +X 62003205 62041580 centromeric_repeat 1000 . +X 83658929 83659019 Low_mappability_island 1000 . +X 108297348 108297886 LSU-rRNA_Hsa 1000 . +X 114959057 115006437 Low_mappability_island 1000 . +X 125605623 125607351 Low_mappability_island 1000 . +X 125714985 125715338 Low_mappability_island 1000 . +X 125864844 125864980 Low_mappability_island 1000 . +X 125865719 125865874 Low_mappability_island 1000 . +Y 313470 313613 ALR/Alpha 1000 . +Y 3004989 3005175 LSU-rRNA_Hsa 1000 . +Y 4212807 4212910 Low_mappability_island 1000 . +Y 7671817 7694928 BSR/Beta 1000 . +Y 7726064 7730229 BSR/Beta 1000 . +Y 7730734 7731598 BSR/Beta 1000 . +Y 7735811 7752887 BSR/Beta 1000 . +Y 7785067 7806311 BSR/Beta 1000 . +Y 7806856 7814704 BSR/Beta 1000 . +Y 7815230 7820478 BSR/Beta 1000 . +Y 7829937 7832032 BSR/Beta 1000 . +Y 7832744 7848695 BSR/Beta 1000 . +Y 7870343 7873582 BSR/Beta 1000 . +Y 7874115 7874584 BSR/Beta 1000 . +Y 7875409 7885257 BSR/Beta 1000 . +Y 7886545 7894591 BSR/Beta 1000 . +Y 7898927 7916812 BSR/Beta 1000 . +Y 7918790 7921352 BSR/Beta 1000 . +Y 7926344 7936705 BSR/Beta 1000 . +Y 7941130 7947438 BSR/Beta 1000 . +Y 7948790 7964448 BSR/Beta 1000 . +Y 8179010 8181143 BSR/Beta 1000 . +Y 8181757 8213330 BSR/Beta 1000 . +Y 8214629 8215637 BSR/Beta 1000 . +Y 8220421 8230061 BSR/Beta 1000 . +Y 8230686 8231546 BSR/Beta 1000 . +Y 8240772 8265916 BSR/Beta 1000 . +Y 8291535 8292942 BSR/Beta 1000 . +Y 8294002 8295175 BSR/Beta 1000 . +Y 8296944 8321375 BSR/Beta 1000 . +Y 8325813 8325929 BSR/Beta 1000 . +Y 8326678 8333466 BSR/Beta 1000 . +Y 8334027 8342387 BSR/Beta 1000 . +Y 8356544 8369346 BSR/Beta 1000 . +Y 8909560 8909925 TAR1 1000 . +Y 8979478 8979585 Low_mappability_island 1000 . +Y 9072781 9072993 TAR1 1000 . +Y 9908430 9925608 centromeric_repeat 1000 . +Y 9981952 9982126 BSR/Beta 1000 . +Y 10034864 10036712 SSU-rRNA_Hsa 1000 . +Y 10040627 10045657 ALR/Alpha 1000 . +Y 10047773 10052533 ALR/Alpha 1000 . +Y 10053695 10057722 ALR/Alpha 1000 . +Y 10059394 10073694 ALR/Alpha 1000 . +Y 10075082 10075781 ALR/Alpha 1000 . +Y 10080736 10104539 ALR/Alpha 1000 . +Y 13104530 13144368 centromeric_repeat 1000 . +Y 13193966 13196535 Low_mappability_island 1000 . +Y 13252193 13259484 centromeric_repeat 1000 . +Y 13290177 13290667 chrM 1000 . +Y 13445957 13490591 Satellite_repeat 1000 . +Y 13642186 13749784 Satellite_repeat 1000 . +Y 13798522 13870984 Satellite_repeat 1000 . +Y 19691913 19692524 LSU-rRNA_Hsa 1000 . +Y 19764063 19776198 ALR/Alpha 1000 . +Y 19780600 19781704 ALR/Alpha 1000 . +Y 19783669 19796396 ALR/Alpha 1000 . +Y 19800068 19801419 ALR/Alpha 1000 . +Y 19808085 19817100 ALR/Alpha 1000 . +Y 19944298 19944581 TAR1 1000 . +Y 20235195 20235478 TAR1 1000 . +Y 20362679 20371694 ALR/Alpha 1000 . +Y 20378360 20379711 ALR/Alpha 1000 . +Y 20383383 20396110 ALR/Alpha 1000 . +Y 20398075 20399179 ALR/Alpha 1000 . +Y 20403581 20415713 ALR/Alpha 1000 . +Y 20487248 20487859 LSU-rRNA_Hsa 1000 . +Y 23124788 23125577 BSR/Beta 1000 . +Y 23149027 23151205 BSR/Beta 1000 . +Y 23157969 23158245 BSR/Beta 1000 . +Y 23159001 23167737 BSR/Beta 1000 . +Y 23178886 23181770 BSR/Beta 1000 . +Y 23220740 23223625 BSR/Beta 1000 . +Y 23234125 23235822 BSR/Beta 1000 . +Y 23236898 23248080 BSR/Beta 1000 . +Y 23248729 23248851 BSR/Beta 1000 . +Y 23899295 23899388 TAR1 1000 . +Y 23956449 23956628 TAR1 1000 . +Y 24247659 24247700 TAR1 1000 . +Y 24630999 24631040 TAR1 1000 . +Y 24953159 24975657 BSR/Beta 1000 . +Y 24980997 24991235 BSR/Beta 1000 . +Y 25022753 25039185 BSR/Beta 1000 . +Y 25040153 25042421 BSR/Beta 1000 . +Y 25048332 25059258 BSR/Beta 1000 . +Y 25060235 25064798 BSR/Beta 1000 . +Y 25099139 25121882 BSR/Beta 1000 . +Y 25122419 25160800 BSR/Beta 1000 . +Y 25182404 25192372 BSR/Beta 1000 . +Y 25217722 25219409 BSR/Beta 1000 . +Y 25493588 25495275 BSR/Beta 1000 . +Y 26148315 26148450 TAR1 1000 . +Y 26586905 26609405 BSR/Beta 1000 . +Y 26614745 26624983 BSR/Beta 1000 . +Y 26656502 26672934 BSR/Beta 1000 . +Y 26673902 26676170 BSR/Beta 1000 . +Y 26682081 26693007 BSR/Beta 1000 . +Y 26693984 26698547 BSR/Beta 1000 . +Y 26732883 26755623 BSR/Beta 1000 . +Y 26756160 26794538 BSR/Beta 1000 . +Y 26816148 26826116 BSR/Beta 1000 . +Y 26851466 26853153 BSR/Beta 1000 . +Y 27109247 27110934 BSR/Beta 1000 . +Y 27136281 27146249 BSR/Beta 1000 . +Y 27167859 27206241 BSR/Beta 1000 . +Y 27206778 27229502 BSR/Beta 1000 . +Y 27263848 27268411 BSR/Beta 1000 . +Y 27269388 27280315 BSR/Beta 1000 . +Y 27286226 27288494 BSR/Beta 1000 . +Y 27289462 27305895 BSR/Beta 1000 . +Y 27337415 27347656 BSR/Beta 1000 . +Y 27352996 27375497 BSR/Beta 1000 . +Y 27813984 27814119 TAR1 1000 . +Y 28555026 28555353 TAR1 1000 . +Y 28784129 28819695 Satellite_repeat 1000 . +Y 58819367 58917648 (CATTC)n 1000 . +Y 58971913 58997782 (CATTC)n 1000 . +Y 59361267 59362785 TAR1 1000 . \ No newline at end of file diff --git a/assets/email_template.html b/assets/email_template.html index de00e08c..e5f5f518 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,113 +1,53 @@ - - - - + + + + - - nf-core/cutandrun Pipeline Report - - -
- + + nf-core/cutandrun Pipeline Report + + +
-

nf-core/cutandrun v${version}

-

Run Name: $runName

+ - <% if (!success){ out << """ -
-

nf-core/cutandrun execution completed unsuccessfully!

+

nf-core/cutandrun v${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

nf-core/cutandrun execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ } else { out << """ -
+
${errorReport}
+
+ """ +} else { + out << """ +
nf-core/cutandrun execution completed successfully! -
- """ } %> +
+ """ +} +%> -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
-$commandLine
+

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
-

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> " - - - - - " }.join("\n") %> - -
- $k - -
$v
-
+

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
-

nf-core/cutandrun

-

https://github.com/nf-core/cutandrun

-
- - +

nf-core/cutandrun

+

https://github.com/nf-core/cutandrun

+ +
+ + + \ No newline at end of file diff --git a/assets/email_template.txt b/assets/email_template.txt index d34cf2af..3d4bd78f 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/cutandrun v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..47dbc791 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,25 @@ +id: "nf-core-cutandrun-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/cutandrun Methods Description" +section_href: "https://github.com/nf-core/cutandrun" +plot_type: "html" +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/cutandrun v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc/frip_score_header.txt b/assets/multiqc/frip_score_header.txt new file mode 100644 index 00000000..a0cab909 --- /dev/null +++ b/assets/multiqc/frip_score_header.txt @@ -0,0 +1,17 @@ +#id: 'primary_frip_score' +#parent_id: 'peak_qc' +#parent_name: 'Peak QC' +#parent_description: 'This section contains peak-based QC reports' +#section_name: 'Sample FRiP score' +#description: "is generated by calculating the fraction of all mapped fragments that fall +# into the peak regions called by either MACS2 or SEACR. +# See FRiP score." +#plot_type: 'bargraph' +#anchor: 'primary_fripscore' +#pconfig: +# id: 'primary_fripscore_plot' +# title: 'FRiP score' +# ylab: 'FRiP score' +# ymax: 1 +# ymin: 0 +# tt_decimals: 2 diff --git a/assets/multiqc/peak_counts_consensus_header.txt b/assets/multiqc/peak_counts_consensus_header.txt new file mode 100644 index 00000000..0423cafd --- /dev/null +++ b/assets/multiqc/peak_counts_consensus_header.txt @@ -0,0 +1,13 @@ +#id: 'consensus_peak_counts' +#parent_id: 'peak_qc' +#parent_name: 'Peak QC' +#parent_description: 'This section contains peak-based QC reports' +#section_name: 'Consensus Peak Count' +#description: 'Calculated from the total number of peaks called by MACS2 or SEACR' +#plot_type: 'bargraph' +#anchor: 'consensus_peakcounts' +#pconfig: +# id: 'consensus_peakcounts_plot' +# title: 'Total Peak Counts' +# ylab: "# Peaks" +# cpswitch_counts_label: 'Number of Peaks' diff --git a/assets/multiqc/peak_counts_header.txt b/assets/multiqc/peak_counts_header.txt new file mode 100644 index 00000000..c6145c8a --- /dev/null +++ b/assets/multiqc/peak_counts_header.txt @@ -0,0 +1,13 @@ +#id: 'primary_peak_counts' +#parent_id: 'peak_qc' +#parent_name: 'Peak QC' +#parent_description: 'This section contains peak-based QC reports' +#section_name: 'Sample Peak Count' +#description: 'Calculated from the total number of peaks called by MACS2 or SEACR' +#plot_type: 'bargraph' +#anchor: 'primary_peakcounts' +#pconfig: +# id: 'primary_peakcounts_plot' +# title: 'Total Peak Counts' +# ylab: "# Peaks" +# cpswitch_counts_label: 'Number of Peaks' diff --git a/assets/multiqc/peak_reprod_header.txt b/assets/multiqc/peak_reprod_header.txt new file mode 100644 index 00000000..daccd629 --- /dev/null +++ b/assets/multiqc/peak_reprod_header.txt @@ -0,0 +1,13 @@ +#id: 'peak_reprod_perc' +#parent_id: 'peak_qc' +#parent_name: 'Peak QC' +#parent_description: 'This section contains peak-based QC reports' +#section_name: 'Sample Peak reproducibility %' +#description: 'Calculated from the total number of overlapping peaks within group replicate sets' +#plot_type: 'bargraph' +#anchor: 'primary_peakrepro' +#pconfig: +# id: 'primary_peakrepro_plot' +# title: 'Peak reproducibility %' +# ylab: "Reprod Fraction" +# stacking: None diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 982e93ee..bfba9bd4 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -2,17 +2,9 @@ report_comment: > This report has been generated by the nf-core/cutandrun analysis pipeline. For information about how to interpret these results, please see the documentation. -report_section_order: - software_versions: - order: -1000 - nf-core-cutandrun-summary: - order: -1001 export_plots: true -report_header_info: - - Application Type: "CUT&RUN" - # Run only these modules run_modules: - custom_content @@ -20,7 +12,9 @@ run_modules: - cutadapt - bowtie2 - samtools + - preseq - picard + - deeptools module_order: - fastqc: @@ -42,26 +36,52 @@ module_order: name: "Bowtie2 (spike-in)" path_filters: - "./bowtie2_spikein/*.bowtie2.log" - - samtools - - picard + - samtools: + name: "Samtools alignment scores" + - preseq: + name: "Library Complexity" + - picard: + name: "Picard duplication stats" + - deeptools: + name: "Fragment QC" - custom_content +report_section_order: + fragment_lengths: + order: -1001 + peak_qc: + order: -1002 + software_versions: + order: -1003 + nf-core-cutandrun-summary: + order: -1004 + custom_content: order: - - fragment_lengths - - software_versions - - software_versions_unique -extra_fn_clean_exts: - - "_val_1" - - "_val_2" + - primary_peak_counts + - consensus_peak_counts + - primary_frip_score + - peak_reprod_perc + - software-versions-by-process + - software-versions-unique + # Don't show % Dups in the General Stats table (we have this from Picard) table_columns_visible: - FastQC: + fastqc: percent_duplicates: False +extra_fn_clean_trim: + - ".spikein" + - ".target" + - ".target.filtered" + - "_1" + - "_2" + - "_1_val_1" + - "_2_val_2" + section_comments: fastqc_raw_fastqc_per_base_sequence_content: > - The discordant sequence content at the begining of the reads are common phenomenon for CUT&TAG reads. Failing to pass the Per base seuqnence content does not mean your data failed. + Discordant sequence content at the beginning of the reads are common phenomenon for CUT&TAG/RUN reads. Failing to pass the Per-base sequence content does not mean your data failed. - It can be due to the Tn5 preference. - What you might be detecting is the 10-bp periodicity that shows up as a sawtooth pattern in the length distribution. If so, this is normal and will not affect alignment or peak calling. In any case we do not recommend trimming as the bowtie2 parameters that we list will give accurate mapping information without trimming. diff --git a/bin/calc_frag_hist.py b/bin/calc_frag_hist.py new file mode 100755 index 00000000..24a3614a --- /dev/null +++ b/bin/calc_frag_hist.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python + +import os +import glob +import argparse + +import numpy as np +import pandas as pd + +############################################ +############################################ +## PARSE ARGUMENTS +############################################ +############################################ +Description = "Calclate fragment histogram" + +parser = argparse.ArgumentParser(description=Description) + +## REQUIRED PARAMETERS +parser.add_argument("--frag_path") +parser.add_argument("--output") +args = parser.parse_args() + +############################################ +############################################ +## MAIN FUNCTION +############################################ +############################################ + +print("Calclate fragment histogram") + +# Init +frag_path = os.path.abspath(args.frag_path) +frag_hist = None + +# Create list of deeptools raw fragment files +dt_frag_list = glob.glob(frag_path) +dt_frag_list.sort() + +for i in list(range(len(dt_frag_list))): + # Create dataframe from csv file for each file and save to a list of data frames + dt_frag_i = pd.read_csv(dt_frag_list[i], sep="\t", header=None, names=["Size", "Occurrences"]) + frag_base_i = os.path.basename(dt_frag_list[i]) + + # Split txt file paths on dots + sample_id_list = frag_base_i.split(".") + + # Join list on the elements of the sample id + separator = "" + sample_id = separator.join(sample_id_list[0:-2]) + + # Split sample id on underscores + sample_id_split_list = sample_id.split("_") + + # Take first element of this list for group id + group_i = separator.join(sample_id_split_list[0:-1]) + + # Take last element of this list for replicate number + rep_i = sample_id_split_list[-1] + + # Round column and convert occurrences to int + dt_frag_i = dt_frag_i.round(1) + dt_frag_i = dt_frag_i.astype(int) + + # Create long forms of fragment histograms + dt_frag_i_long = np.repeat(dt_frag_i["Size"].values, dt_frag_i["Occurrences"].values) + dt_group_i_long = np.repeat(group_i, len(dt_frag_i_long)) + dt_rep_i_long = np.repeat(rep_i, len(dt_frag_i_long)) + + dt_group_i_short = np.repeat(group_i, dt_frag_i.shape[0]) + dt_rep_i_short = np.repeat(rep_i, dt_frag_i.shape[0]) + + if i == 0: + frags_arr = dt_frag_i_long + group_arr = dt_group_i_long + rep_arr = dt_rep_i_long + + group_short = dt_group_i_short + rep_short = dt_rep_i_short + frag_hist = dt_frag_i + else: + frags_arr = np.append(frags_arr, dt_frag_i_long) + group_arr = np.append(group_arr, dt_group_i_long) + rep_arr = np.append(rep_arr, dt_rep_i_long) + + group_short = np.append(group_short, dt_group_i_short) + rep_short = np.append(rep_short, dt_rep_i_short) + frag_hist = frag_hist.append(dt_frag_i) + +frag_hist["group"] = group_short +frag_hist["replicate"] = rep_short +frag_hist = frag_hist.reset_index(drop=True) + +size_list = frag_hist["Size"].to_numpy().astype(str) +occurrences_list = frag_hist["Occurrences"].to_numpy().astype(str) +size_list_sep = np.core.defchararray.add(size_list, " : ") +x_y_list = np.core.defchararray.add(size_list_sep, occurrences_list) + +group_rep = frag_hist[["group", "replicate"]].groupby(["group", "replicate"]).size().reset_index() +first_line = "data:" + +for i in list(range(group_rep.shape[0])): + group_i = group_rep.at[i, "group"] + rep_i = group_rep.at[i, "replicate"] + str_list = x_y_list[(frag_hist["group"] == group_i) & (frag_hist["replicate"] == rep_i)] + + x_y_str = ", ".join(str_list) + full_line_i = " '" + group_i + "_" + rep_i + "' : {" + x_y_str + "}" + if i == 0: + frag_len_hist_mqc_dict = "\n".join([first_line, full_line_i]) + + else: + frag_len_hist_mqc_dict = "\n".join([frag_len_hist_mqc_dict, full_line_i]) + +txt_mqc = open(args.output, "w") +txt_mqc.write(frag_len_hist_mqc_dict) +txt_mqc.close() diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index db042cde..7d38ce17 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -13,7 +13,10 @@ def parse_args(args=None): parser = argparse.ArgumentParser(description=Description, epilog=Epilog) parser.add_argument("FILE_IN", help="Input samplesheet file.") parser.add_argument("FILE_OUT", help="Output file.") - parser.add_argument("USE_CONTROL", help="Boolean for whether or not the user has specified the pipeline must normalise against a control") + parser.add_argument( + "USE_CONTROL", + help="Boolean for whether or not the user has specified the pipeline must normalise against a control", + ) return parser.parse_args(args) @@ -45,16 +48,16 @@ def check_samplesheet(file_in, file_out, use_control): WT,1,WT_LIB2_REP1_1.fastq.gz,WT_LIB2_REP1_2.fastq.gz,CONTROL_GROUP WT,2,WT_LIB1_REP2_1.fastq.gz,WT_LIB1_REP2_2.fastq.gz,CONTROL_GROUP KO,1,KO_LIB1_REP1_1.fastq.gz,KO_LIB1_REP1_2.fastq.gz,CONTROL_GROUP - CONTROL_GROUP,1,KO_LIB1_REP1_1.fastq.gz,IGG_LIB1_REP1_2.fastq.gz, - CONTROL_GROUP,2,KO_LIB1_REP1_1.fastq.gz,IGG_LIB1_REP1_2.fastq.gz, + CONTROL_GROUP,1,KO_LIB1_REP1_1.fastq.gz,IGG_LIB1_REP1_2.fastq.gz, + CONTROL_GROUP,2,KO_LIB1_REP1_1.fastq.gz,IGG_LIB1_REP1_2.fastq.gz, """ # Init control_present = False - num_fastq_list = [] - sample_names_list = [] + num_fastq_list = [] + sample_names_list = [] control_names_list = [] - sample_run_dict = {} + sample_run_dict = {} with open(file_in, "r") as fin: @@ -66,7 +69,11 @@ def check_samplesheet(file_in, file_out, use_control): header = [x.strip('"') for x in fin.readline().strip().split(",")] if len(header) >= len(LEGACY_HEADER) and header[: len(LEGACY_HEADER)] == LEGACY_HEADER: - print("ERROR: It looks like you are using a legacy header format with a newer version of the pipeline -> {} != {}".format(",".join(header), ",".join(HEADER))) + print( + "ERROR: It looks like you are using a legacy header format with a newer version of the pipeline -> {} != {}".format( + ",".join(header), ",".join(HEADER) + ) + ) sys.exit(1) if header[: len(HEADER)] != HEADER: @@ -74,21 +81,28 @@ def check_samplesheet(file_in, file_out, use_control): sys.exit(1) ## Check sample entries + line_no = 1 for line in fin: lspl = [x.strip().strip('"') for x in line.strip().split(",")] - ## Set control_present to true if the control column is not empty - if lspl[4] != "": - control_present = True + ## Check if its just a blank line so we dont error + if line.strip() == "": + continue ## Check valid number of columns per row if len(lspl) != HEADER_LEN: print_error( - "Invalid number of columns (found {} should be {})!".format(len(lspl), len(HEADER)), + "Invalid number of columns (found {} should be {})! - line no. {}".format( + len(lspl), len(HEADER), line_no + ), "Line", line, ) + ## Set control_present to true if the control column is not empty + if lspl[4] != "": + control_present = True + ## Check valid number of populated columns per row num_cols = len([x for x in lspl if x]) if num_cols < MIN_COLS: @@ -154,7 +168,7 @@ def check_samplesheet(file_in, file_out, use_control): print_error("Samplesheet contains duplicate rows!", "Line", line) else: sample_run_dict[sample][replicate].append(sample_info) - + ## Store unique sample names if sample not in sample_names_list: sample_names_list.append(sample) @@ -163,6 +177,8 @@ def check_samplesheet(file_in, file_out, use_control): if control not in control_names_list: control_names_list.append(control) + line_no = line_no + 1 + ## Check data is either paired-end/single-end and not both if min(num_fastq_list) != max(num_fastq_list): print_error("Mixture of paired-end and single-end reads!") @@ -170,7 +186,9 @@ def check_samplesheet(file_in, file_out, use_control): ## Check control group exists for ctrl in control_names_list: if ctrl != "" and ctrl not in sample_names_list: - print_error("Each control entry must match at least one group entry! Unmatched control entry: {}.".format(ctrl)) + print_error( + "Each control entry must match at least one group entry! Unmatched control entry: {}.".format(ctrl) + ) ## Create control identity variable for sample in sorted(sample_run_dict.keys()): @@ -183,15 +201,21 @@ def check_samplesheet(file_in, file_out, use_control): print_error("Control cannot have a control: {}.".format(sample_info[0])) else: sample_info.append("0") - else: + else: sample_info.append("0") ## Check use_control parameter is consistent with input groups - if (use_control == 'true' and not control_present): - print_error("ERROR: No 'control' group was found in " + str(file_in) + " If you are not supplying a control, please specify --use_control 'false' on command line.") - - if (use_control == 'false' and control_present): - print("WARNING: Parameter --use_control was set to false, but an control group was found in " + str(file_in) + ".") + if use_control == "true" and not control_present: + print_error( + "ERROR: No 'control' group was found in " + + str(file_in) + + " If you are not supplying a control, please specify --use_control 'false' on command line." + ) + + if use_control == "false" and control_present: + print( + "WARNING: Parameter --use_control was set to false, but an control group was found in " + str(file_in) + "." + ) ## Write validated samplesheet with appropriate columns if len(sample_run_dict) > 0: @@ -199,7 +223,10 @@ def check_samplesheet(file_in, file_out, use_control): make_dir(out_dir) with open(file_out, "w") as fout: - fout.write(",".join(["id", "group", "replicate", "control", "single_end", "fastq_1", "fastq_2", "is_control"]) + "\n") + fout.write( + ",".join(["id", "group", "replicate", "control", "single_end", "fastq_1", "fastq_2", "is_control"]) + + "\n" + ) for sample in sorted(sample_run_dict.keys()): ## Check that replicate ids are in format 1.. @@ -216,10 +243,7 @@ def check_samplesheet(file_in, file_out, use_control): check_group = sample_run_dict[sample][replicate][0][2] for tech_rep in sample_run_dict[sample][replicate]: if tech_rep[2] != check_group: - print_error( - "Control group must match within technical replicates", - tech_rep[2] - ) + print_error("Control group must match within technical replicates", tech_rep[2]) ## Write to file for idx, sample_info in enumerate(sample_run_dict[sample][replicate]): @@ -231,5 +255,6 @@ def main(args=None): args = parse_args(args) check_samplesheet(args.FILE_IN, args.FILE_OUT, args.USE_CONTROL) + if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file + sys.exit(main()) diff --git a/bin/frip.py b/bin/frip.py deleted file mode 100755 index f174d82f..00000000 --- a/bin/frip.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python - -import os -import glob -import argparse - -import deeptools.countReadsPerBin as crpb -import pysam - -############################################ -############################################ -## PARSE ARGUMENTS -############################################ -############################################ -Description = 'Calclate FRIP scores (FRagment proportion in Peaks regions) using deeptools for each sample' - -parser = argparse.ArgumentParser(description=Description) - -## REQUIRED PARAMETERS -parser.add_argument('--bams', help="Bam file.") -parser.add_argument('--peaks', help="Peaks interval file.") -parser.add_argument('--threads', help="the number of threads for the task.") -parser.add_argument('--outpath', help="Full path to output directory.") -args = parser.parse_args() - -############################################ -############################################ -## MAIN FUNCTION -############################################ -############################################ - -# https://deeptools.readthedocs.io/en/develop/content/example_api_tutorial.html - -# Create file lists -bam_file_list = glob.glob(args.bams) -peak_file_list = glob.glob(args.peaks) - -frips = [] -for idx, bam_file in enumerate(bam_file_list): - # Init - frip = 0 - - # Read first line - first_line = None - with open(peak_file_list[idx], "r") as file: - for line in file: - first_line = line - break - - if first_line is not None: - print("Calculating " + bam_file + " using " + peak_file_list[idx]) - cr = crpb.CountReadsPerBin([bam_file], bedFile=[peak_file_list[idx]], numberOfProcessors=int(1)) - - # Calc the total number of reads in peaks per bam file - reads_at_peaks = cr.run() - total = reads_at_peaks.sum(axis=0) - - # Load up bam file and get the total number of mapped reads - bam = pysam.AlignmentFile(bam_file) - - # Calc frip - frip = float(total[0]) / bam.mapped - - frips.append(str(frip)) - - # Log - print("Frip = " + str(frip)) - -# Create string and write to file -frip_string = ",".join(frips) -writer = open(os.path.join(args.outpath, "frips.csv"), "w") -writer.write("frip\n") -writer.write(frip_string) -writer.close() diff --git a/bin/gtf2bed b/bin/gtf2bed new file mode 100755 index 00000000..0163049e --- /dev/null +++ b/bin/gtf2bed @@ -0,0 +1,131 @@ +#!/usr/bin/env perl + +# Copyright (c) 2011 Erik Aronesty (erik@q32.com) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT. + +use Getopt::Long; + +my $extended; +my $displaynames = ''; +GetOptions("x"=>\$extended, "names"=>\$displaynames); + +$in = shift @ARGV; + +my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n"; +open IN, $in_cmd; + +while () { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { + # most ver 2's stick gene names in the id field + ($id) = $f[8]=~ /\bID="([^"]+)"/; + # most ver 3's stick unquoted names in the name field + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } + + next unless $id && $f[0]; + + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; + # gff3 puts :\d in exons sometimes + $id =~ s/:\d+$// if $gff == 3; + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } +} + +for $id ( + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + my ($cds, $cde); + ($cds, $cde) = @{$sc{$id}} if $sc{$id}; + + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + + my $gene_id; + my $extend = ""; + + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + if ($extended) { + $extend="\t$gene_id"; + } + + if ($displaynames) { + print "$chr\t$beg\t$end\t$gene_id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; + } else { + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; + } + + # added an extra comma to make it look exactly like ucsc's beds + # print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; +} + +close IN; diff --git a/bin/igv_files_to_session.py b/bin/igv_files_to_session.py index cf7c3124..7ed47df0 100755 --- a/bin/igv_files_to_session.py +++ b/bin/igv_files_to_session.py @@ -22,12 +22,24 @@ argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) ## REQUIRED PARAMETERS -argParser.add_argument('XML_OUT', help="XML output file.") -argParser.add_argument('LIST_FILE', help="Tab-delimited file containing two columns i.e. file_name\tcolour. Header isnt required.") -argParser.add_argument('GENOME', help="Full path to genome fasta file or shorthand for genome available in IGV e.g. hg19.") +argParser.add_argument("XML_OUT", help="XML output file.") +argParser.add_argument( + "LIST_FILE", help="Tab-delimited file containing two columns i.e. file_name\tcolour. Header isnt required." +) +argParser.add_argument( + "GENOME", help="Full path to genome fasta file or shorthand for genome available in IGV e.g. hg19." +) +argParser.add_argument("GTF_BED") ## OPTIONAL PARAMETERS -argParser.add_argument('-pp', '--path_prefix', type=str, dest="PATH_PREFIX", default='', help="Path prefix to be added at beginning of all files in input list file.") +argParser.add_argument( + "-pp", + "--path_prefix", + type=str, + dest="PATH_PREFIX", + default="", + help="Path prefix to be added at beginning of all files in input list file.", +) args = argParser.parse_args() ############################################ @@ -36,6 +48,7 @@ ############################################ ############################################ + def makedir(path): if not len(path) == 0: @@ -45,80 +58,125 @@ def makedir(path): if exception.errno != errno.EEXIST: raise + ############################################ ############################################ ## MAIN FUNCTION ############################################ ############################################ -def igv_files_to_session(XMLOut,ListFile,Genome,PathPrefix=''): + +def igv_files_to_session(XMLOut, ListFile, Genome, GtfBed, PathPrefix=""): makedir(os.path.dirname(XMLOut)) fileList = [] - fin = open(ListFile,'r') + fin = open(ListFile, "r") while True: line = fin.readline() if line: - ifile,colour = line.strip().split('\t') + ifile, colour = line.strip().split("\t") if len(colour.strip()) == 0: - colour = '0,0,178' - fileList.append((PathPrefix.strip()+ifile,colour)) + colour = "0,0,178" + fileList.append((PathPrefix.strip() + ifile, colour)) else: break fout.close() ## ADD RESOURCES SECTION - XMLStr = '\n' + XMLStr = '\n' XMLStr += '\n' % (Genome) - XMLStr += '\t\n' - for ifile,colour in fileList: + XMLStr += "\t\n" + for ifile, colour in fileList: XMLStr += '\t\t\n' % (ifile) - XMLStr += '\t\n' + XMLStr += '\t\t\n' % (GtfBed) + XMLStr += "\t\n" - ## ADD PANEL SECTION XMLStr += '\t\n' - for ifile,colour in fileList: + + # Render gene file first + XMLStr += ( + '\t\t\n' + % (GtfBed, os.path.basename(GtfBed)) + ) + + ## Do a GTF pass first + for ifile, colour in fileList: + extension = os.path.splitext(ifile)[1].lower() + if extension in [".gtf"]: + XMLStr += ( + '\t\t\n' + % (ifile, os.path.basename(ifile)) + ) + elif extension in [".gff"]: + XMLStr += ( + '\t\t\n' + % (ifile, os.path.basename(ifile)) + ) + + ## Then beds/narrowpeak + for ifile, colour in fileList: extension = os.path.splitext(ifile)[1].lower() - if extension in ['.bed','.broadpeak','.narrowpeak']: - XMLStr += '\t\t\n' + % (ifile, os.path.basename(ifile)) + ) + elif extension in [".bw", ".bigwig", ".tdf", ".bedGraph", ".bedgraph"]: + XMLStr += ( + '\t\t\n' + % (ifile, os.path.basename(ifile)) + ) XMLStr += '\t\t\t\n' - XMLStr += '\t\t\n' - elif extension in ['.gtf']: - XMLStr += '\t\t 0: # Filter for files which had an overlap and group by peak @@ -81,21 +106,20 @@ ddf_inter_grouped = ddf_inter_filt.groupby(by=["key"]).size() df_inter_grouped = ddf_inter_grouped.compute() df_inter_grouped = df_inter_grouped.reset_index() - df_inter_grouped = df_inter_grouped.rename({0: 'count'}, axis=1) + df_inter_grouped = df_inter_grouped.rename({0: "count"}, axis=1) # Filter for peaks which have full overlap df_inter_grouped_filter = df_inter_grouped[df_inter_grouped["count"] == numfiles] overlap_peaks = len(df_inter_grouped_filter.index) - print('Overlap peaks: ' + str(overlap_peaks)) + print("Overlap peaks: " + str(overlap_peaks)) - # Calc peak percentage + # Calc peak percentage peak_perc = (overlap_peaks / total_peaks) * 100 else: - print('Empty file detected') + print("Empty file detected") # Create string and write to file output_string = str(peak_perc) -writer = open(os.path.join(args.outpath, "peak_repro.csv"), "w") -writer.write("peak_repro\n") -writer.write(output_string) +writer = open(os.path.join(args.outpath, args.sample_id + "_peak_repro.tsv"), "w") +writer.write(args.sample_id + "\t" + output_string + "\n") writer.close() diff --git a/bin/consensus_peaks.py b/bin/plot_consensus_peaks.py similarity index 64% rename from bin/consensus_peaks.py rename to bin/plot_consensus_peaks.py index 8aa943e2..072a1f8b 100755 --- a/bin/consensus_peaks.py +++ b/bin/plot_consensus_peaks.py @@ -13,14 +13,14 @@ ## PARSE ARGUMENTS ############################################ ############################################ -Description = 'Upset ven diagram of consensus peaks.' +Description = "Upset venn diagram of consensus peaks." Epilog = """Example usage: python consensus_peaks.py """ parser = argparse.ArgumentParser(description=Description, epilog=Epilog) ## REQUIRED PARAMETERS -parser.add_argument('--peaks', help="Merged peaks interval file with replicate counts column.") -parser.add_argument('--outpath', help="Full path to output directory.") +parser.add_argument("--peaks", help="Merged peaks interval file with replicate counts column.") +parser.add_argument("--outpath", help="Full path to output directory.") args = parser.parse_args() ############################################ @@ -33,32 +33,38 @@ peak_file_list = glob.glob(args.peaks) if len(peak_file_list) > 10: - print('WARN: There are too many files to generate an upset plot, cancelling figure generation') + print("WARN: There are too many files to generate an upset plot, cancelling figure generation") exit(0) peak_df_list = list() for i in list(range(len(peak_file_list))): - peaks_i = pd.read_csv(peak_file_list[i], sep='\t', header=None, usecols=[0,1,2,8,9], names=['chrom','start','end','sample_reps','count']) - peaks_i['sample_reps'] = peaks_i['sample_reps'].replace(".peaks.bed.stringent.bed", "", regex=True) + peaks_i = pd.read_csv( + peak_file_list[i], + sep="\t", + header=None, + usecols=[0, 1, 2, 8, 9], + names=["chrom", "start", "end", "sample_reps", "count"], + ) + peaks_i["sample_reps"] = peaks_i["sample_reps"].replace(".peaks.bed.stringent.bed", "", regex=True) peak_df_list.append(peaks_i) - reps2 = peaks_i[peaks_i["count"]>1] + reps2 = peaks_i[peaks_i["count"] > 1] # add sorted column to each dataframe, and make new condensed dataframe list summary_peak_df_list = list() for i in list(range(len(peak_df_list))): peaks_i = peak_df_list[i] - peaks_i['sorted_samples'] = '' + peaks_i["sorted_samples"] = "" rows_now = peaks_i.shape[0] for j in list(range(rows_now)): - sample_list = peaks_i.at[j,'sample_reps'] - sample_array = np.unique(sample_list.split(',')) + sample_list = peaks_i.at[j, "sample_reps"] + sample_array = np.unique(sample_list.split(",")) sample_sorted = sorted(sample_array) sample_str = ",".join(sample_sorted) - peaks_i.at[j,'sorted_samples'] = sample_str - summary_peaks_i = peaks_i[['sorted_samples', 'count']].groupby(['sorted_samples'], as_index = False).sum() + peaks_i.at[j, "sorted_samples"] = sample_str + summary_peaks_i = peaks_i[["sorted_samples", "count"]].groupby(["sorted_samples"], as_index=False).sum() summary_peak_df_list.append(summary_peaks_i) -# construct data in appropriate format for upsetplot, and plot +# construct data in appropriate format for upsetplot, and plot for i in list(range(len(summary_peak_df_list))): df_i = summary_peak_df_list[i] # Get group name @@ -68,11 +74,11 @@ categories = df_i.shape[0] cat_list = [] for j in list(range(categories)): - summary_sample = df_i.at[j,'sorted_samples'].split(',') + summary_sample = df_i.at[j, "sorted_samples"].split(",") cat_list.append(summary_sample) # Plot - peak_counts = upsetplot.from_memberships(cat_list, data = df_i['count']) + peak_counts = upsetplot.from_memberships(cat_list, data=df_i["count"]) upsetplot.plot(peak_counts) plt.show() plt.savefig(os.path.join(args.outpath, file_name)) diff --git a/bin/reporting.py b/bin/reporting.py deleted file mode 100755 index afce480c..00000000 --- a/bin/reporting.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import logging - -from lib.reports import Reports - -def init_logger(app_name, log_file = None): - logger = logging.getLogger(app_name) - logger.setLevel(logging.DEBUG) - formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s - %(message)s') - - if log_file: - fh = logging.FileHandler(log_file) - fh.setLevel(logging.DEBUG) - fh.setFormatter(formatter) - logger.addHandler(fh) - - ch = logging.StreamHandler() - ch.setLevel(logging.INFO) - ch.setFormatter(formatter) - logger.addHandler(ch) - - return logger - -def gen_png(parsed_args): - meta_path = parsed_args.meta - meta_ctrl_path = parsed_args.meta_ctrl - frag_path = parsed_args.raw_frag - output_path = parsed_args.output - logger = init_logger('gen_img', parsed_args.log) - bin_frag_path = parsed_args.bin_frag - seacr_bed_path = parsed_args.seacr_bed - - logger.info('Generating plots to output folder') - report_gen = Reports(logger, meta_path, meta_ctrl_path, frag_path, bin_frag_path, seacr_bed_path) - report_gen.generate_cutandrun_reports(output_path) - - logger.info('Completed') - -if __name__ == '__main__': - # Create command args - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers(help='sub-command help') - - # Generate reporting function - parser_genimg = subparsers.add_parser('gen_reports') - parser_genimg.set_defaults(func=gen_png) - parser_genimg.add_argument('--log', required=False) - parser_genimg.add_argument('--meta', required=True) - parser_genimg.add_argument('--meta_ctrl', required=True) - parser_genimg.add_argument('--raw_frag', required=True) - parser_genimg.add_argument('--bin_frag', required=True) - parser_genimg.add_argument('--seacr_bed', required=True) - parser_genimg.add_argument('--output', required=True) - - # Parse - parsed_args = parser.parse_args() - - # Init logging - logger = init_logger('reporting', parsed_args.log) - logger.info("CUT&RUN Python Reporting") - - print(parsed_args) - - # Call functions - parsed_args.func(parsed_args) diff --git a/bin/reports.py b/bin/reports.py new file mode 100644 index 00000000..6a6e5877 --- /dev/null +++ b/bin/reports.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# coding: utf-8 + +import argparse +import logging +import os +import glob +import pandas as pd + +# * +# ======================================================================================== +# UTIL +# ======================================================================================== +# */ + + +def init_logger(app_name, log_file=None): + logger = logging.getLogger(app_name) + logger.setLevel(logging.INFO) + formatter = logging.Formatter("%(asctime)s:%(name)s:%(levelname)s - %(message)s") + + if log_file: + fh = logging.FileHandler(log_file) + fh.setLevel(logging.INFO) + fh.setFormatter(formatter) + logger.addHandler(fh) + + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + ch.setFormatter(formatter) + logger.addHandler(ch) + + return logger + + +# * +# ======================================================================================== +# MAIN +# ======================================================================================== +# */ + +# Input a collection of file tables from different samples and append the sample ids, then output one table +def merge_samples(args): + # Init + metadata_path = os.path.abspath(args.metadata) + + # Log + logger.info("merge_samples " + metadata_path) + + # Get file list + file_list = glob.glob(metadata_path) + file_list.sort() + + df_metadata = None + for idx, file in enumerate(file_list): + # Strip sample id and group name + sample_id = os.path.basename(file).replace(args.id_parse_string, "") + group_name = "_".join(sample_id.split("_")[:-1]) + + # Load table + df_newdata = pd.read_csv(file, sep=",") + df_newdata["id"] = sample_id + df_newdata["group"] = group_name + df_newdata = df_newdata.set_index("id") + + if idx == 0: + df_metadata = df_newdata + else: + df_metadata = df_metadata.append(df_newdata) + + df_metadata.to_csv(args.output, index=True, sep=",") + + +if __name__ == "__main__": + # Create command args + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(help="sub-command help") + subparsers.required = True + + # Generate reporting function + newparser = subparsers.add_parser("merge_samples") + newparser.set_defaults(func=merge_samples) + newparser.add_argument("--log", required=False) + newparser.add_argument("--metadata", required=True) + newparser.add_argument("--id_parse_string", required=True) + newparser.add_argument("--output", required=True) + + # Parse + parsed_args = parser.parse_args() + + # Init logging + logger = init_logger("reporting", parsed_args.log) + logger.info("CUT&RUN Python Reporting") + + # Call functions + parsed_args.func(parsed_args) diff --git a/conf/base.config b/conf/base.config index 6fb65804..8da75675 100644 --- a/conf/base.config +++ b/conf/base.config @@ -17,6 +17,17 @@ process { maxRetries = 1 maxErrors = '-1' + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/conf/flowswitch.config b/conf/flowswitch.config index 566ea296..f9226c29 100644 --- a/conf/flowswitch.config +++ b/conf/flowswitch.config @@ -13,7 +13,8 @@ params { run_trim_galore_fastqc = true run_alignment = true - run_q_filter = false + run_read_filter = true + run_preseq = true run_mark_dups = true run_remove_dups = true @@ -21,9 +22,10 @@ params { run_consensus_all = false run_reporting = true - run_deep_tools = true + run_deeptools_heatmaps = true + run_deeptools_qc = true + run_peak_qc = true run_multiqc = true - run_peak_plotting = true run_igv = true } @@ -31,16 +33,16 @@ params.callers = params.peakcaller ? params.peakcaller.split(',').collect{ it.tr if(params.consensus_peak_mode == 'all') { params.run_consensus_all = true } -if(params.minimum_alignment_q_score > 0) { params.run_q_filter = true } -if(params.skip_removeduplicates || !params.run_mark_dups) { params.run_remove_dups = false } -if(!params.gene_bed || params.skip_heatmaps) { params.run_deep_tools = false } -if(params.skip_multiqc) { params.run_multiqc = false } -if(params.skip_upset_plots) { params.run_peak_plotting = false } -if(params.skip_igv) { params.run_igv = false } +if(params.skip_removeduplicates || !params.run_mark_dups) { params.run_remove_dups = false } +if(params.skip_preseq) { params.run_preseq = false } +if(params.skip_heatmaps) { params.run_deeptools_heatmaps = false } +if(params.skip_dt_qc) { params.run_deeptools_qc = false } +if(params.skip_peak_qc) { params.run_peak_qc = false } +if(params.skip_igv) { params.run_igv = false } if(params.skip_reporting) { params.run_reporting = false + params.run_preseq = false params.run_multiqc = false - params.run_peak_plotting = false } if(params.only_input) { @@ -48,7 +50,8 @@ if(params.only_input) { params.run_cat_fastq = false params.run_trim_galore_fastqc = false params.run_alignment = false - params.run_q_filter = false + params.run_read_filter = false + params.run_preseq = false params.run_mark_dups = false params.run_remove_dups = false params.run_peak_calling = false @@ -61,7 +64,8 @@ if(params.only_genome) { params.run_cat_fastq = false params.run_trim_galore_fastqc = false params.run_alignment = false - params.run_q_filter = false + params.run_read_filter = false + params.run_preseq = false params.run_mark_dups = false params.run_remove_dups = false params.run_peak_calling = false @@ -72,7 +76,8 @@ if(params.only_genome) { if(params.only_preqc) { params.run_genome_prep = false params.run_alignment = false - params.run_q_filter = false + params.run_read_filter = false + params.run_preseq = false params.run_mark_dups = false params.run_remove_dups = false params.run_peak_calling = false @@ -81,7 +86,8 @@ if(params.only_preqc) { } if(params.only_alignment) { - params.run_q_filter = false + params.run_read_filter = false + params.run_preseq = false params.run_mark_dups = false params.run_remove_dups = false params.run_peak_calling = false @@ -99,3 +105,5 @@ if(params.only_peak_calling) { params.run_reporting = false params.run_multiqc = true } + +if(params.skip_multiqc) { params.run_multiqc = false } diff --git a/conf/modules.config b/conf/modules.config index b5ca710c..e50e98f2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -56,19 +56,24 @@ if(params.run_input_check) { ======================================================================================== */ -if (params.run_genome_prep) { +if(params.run_genome_prep) { process { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_GENOME:UNTAR_.*' { + ext.args2 = '--no-same-owner' + } + withName: '.*PREPARE_GENOME:GUNZIP_.*' { publishDir = [ - path: { "${params.outdir}/00_genome" }, - mode: "${params.publish_dir_mode}", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference + enabled: false ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_GENOME:UNTAR_.*' { - ext.args2 = '--no-same-owner' + withName: '.*PREPARE_GENOME:TABIX_BGZIPTABIX' { + publishDir = [ + path: { "${params.outdir}/00_genome/annotation" }, + mode: "${params.publish_dir_mode}", + enabled: params.save_reference + ] } withName: '.*CHROMSIZES' { @@ -97,6 +102,71 @@ if (params.run_genome_prep) { enabled: params.save_reference ] } + + withName: '.*PREPARE_GENOME:SAMTOOLS_FAIDX' { + publishDir = [ + path: { "${params.outdir}/00_genome" }, + mode: "${params.publish_dir_mode}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_GENOME:ANNOTATION_BEDTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/00_genome/annotation" }, + mode: "${params.publish_dir_mode}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + } +} + +if (params.run_genome_prep && params.blacklist) { + process { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_GENOME:BLACKLIST_AWK' { + ext.command = "'{OFS = \"\\t\"} {print \$1, '0', \$2}'" + ext.ext = "bed" + publishDir = [ + enabled: false + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_GENOME:BLACKLIST_BEDTOOLS_SORT' { + publishDir = [ + enabled: false + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_GENOME:BLACKLIST_BEDTOOLS_INTERSECT' { + publishDir = [ + enabled: false + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_GENOME:BLACKLIST_BEDTOOLS_COMPLEMENT' { + publishDir = [ + path: { "${params.outdir}/00_genome" }, + mode: "${params.publish_dir_mode}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + } +} + +if (params.run_genome_prep && !params.gene_bed) { + process { + withName: '.*PREPARE_GENOME:GTF2BED' { + publishDir = [ + path: { "${params.outdir}/00_genome/annotation" }, + mode: "${params.publish_dir_mode}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } } } @@ -181,7 +251,7 @@ if(params.run_alignment) { path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, mode: "${params.publish_dir_mode}", pattern: '*.bam', - enabled: ( params.save_align_intermed || (!params.run_q_filter && !params.run_mark_dups && !params.run_remove_dups) ) + enabled: ( params.save_align_intermed || (!params.run_read_filter && !params.run_mark_dups && !params.run_remove_dups) ) ], [ path: { "${params.outdir}/02_alignment/${params.aligner}/target/unmapped" }, @@ -221,7 +291,7 @@ if(params.run_alignment) { path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, mode: "${params.publish_dir_mode}", pattern: "*.{stats,flagstat,idxstats}", - enabled: ( params.save_align_intermed || (!params.run_q_filter && !params.run_mark_dups && !params.run_remove_dups) ) + enabled: ( params.save_align_intermed || (!params.run_read_filter && !params.run_mark_dups && !params.run_remove_dups) ) ] } @@ -240,7 +310,7 @@ if(params.run_alignment) { path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, mode: "${params.publish_dir_mode}", pattern: "*.bam", - enabled: ( params.save_align_intermed || (!params.run_q_filter && !params.run_mark_dups && !params.run_remove_dups) ) + enabled: ( params.save_align_intermed || (!params.run_read_filter && !params.run_mark_dups && !params.run_remove_dups) ) ] } @@ -260,7 +330,7 @@ if(params.run_alignment) { path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, mode: "${params.publish_dir_mode}", pattern: "*.bai", - enabled: ( params.save_align_intermed || (!params.run_q_filter && !params.run_mark_dups && !params.run_remove_dups) ) + enabled: ( params.save_align_intermed || (!params.run_read_filter && !params.run_mark_dups && !params.run_remove_dups) ) ] } @@ -278,14 +348,14 @@ if(params.run_alignment) { /* ======================================================================================== - Q-FILTERING + READ-FILTERING ======================================================================================== */ -if(params.run_q_filter) { +if(params.run_read_filter) { process { - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:SAMTOOLS_VIEW_SORT_STATS:SAMTOOLS_VIEW' { - ext.args = "-b -q ${params.minimum_alignment_q_score}" + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:FILTER_READS:SAMTOOLS_VIEW' { + ext.args = "-b -q ${params.minimum_alignment_q_score} -F 0x004 -F 0x0008 -f 0x001" ext.prefix = { "${meta.id}.target.filtered" } publishDir = [ path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, @@ -295,7 +365,7 @@ if(params.run_q_filter) { ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:SAMTOOLS_VIEW_SORT_STATS:SAMTOOLS_SORT' { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:FILTER_READS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.target.filtered.sorted" } publishDir = [ path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, @@ -305,7 +375,7 @@ if(params.run_q_filter) { ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:SAMTOOLS_VIEW_SORT_STATS:SAMTOOLS_INDEX' { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:FILTER_READS:SAMTOOLS_INDEX' { ext.prefix = { "${meta.id}.target.filtered" } publishDir = [ path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, @@ -315,7 +385,7 @@ if(params.run_q_filter) { ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:SAMTOOLS_VIEW_SORT_STATS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:FILTER_READS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.target.filtered" } publishDir = [ path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, @@ -327,6 +397,25 @@ if(params.run_q_filter) { } } +/* +======================================================================================== + PRESEQ +======================================================================================== +*/ + +if (params.run_preseq) { + process { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PRESEQ_LCEXTRAP' { + ext.args = "-v -B" + publishDir = [ + path: { "${params.outdir}/04_reporting/preseq" }, + mode: "${params.publish_dir_mode}", + pattern: "*.{log,txt}", + ] + } + } +} + /* ======================================================================================== DUPLICATES @@ -346,22 +435,17 @@ if(params.run_mark_dups) { ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:MARK_DUPLICATES_PICARD:SAMTOOLS_INDEX' { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:MARK_DUPLICATES_PICARD:BAM_SORT_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, mode: "${params.publish_dir_mode}", - pattern: "*.bai", + pattern: "*.{stats,flagstat,idxstats,bam,bai}", enabled: ( params.save_align_intermed || (!params.run_remove_dups) ) ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:MARK_DUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { - publishDir = [ - path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, - mode: "${params.publish_dir_mode}", - pattern: "*.{stats,flagstat,idxstats}", - enabled: ( params.save_align_intermed || (!params.run_remove_dups) ) - ] + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:MARK_DUPLICATES_PICARD:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.target.markdup.sorted" } } } } @@ -379,22 +463,17 @@ if(params.run_remove_dups) { ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEDUPLICATE_PICARD:SAMTOOLS_INDEX' { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEDUPLICATE_PICARD:BAM_SORT_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, mode: "${params.publish_dir_mode}", - pattern: "*.bai", + pattern: "*.{stats,flagstat,idxstats,bam,bai}", enabled: true ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEDUPLICATE_PICARD:BAM_STATS_SAMTOOLS:.*' { - publishDir = [ - path: { "${params.outdir}/02_alignment/${params.aligner}/target" }, - mode: "${params.publish_dir_mode}", - pattern: "*.{stats,flagstat,idxstats}", - enabled: true - ] + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEDUPLICATE_PICARD:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.target.dedup.sorted" } } } } @@ -407,15 +486,15 @@ if(params.run_remove_dups) { if (params.run_alignment) { process { - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:ANNOTATE_BT2_META:AWK_SCRIPT' { - ext.suffix = ".target" + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:EXTRACT_BT2_TARGET_META:AWK_SCRIPT' { + ext.suffix = "_meta_bt2_target" publishDir = [ enabled: false ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:ANNOTATE_BT2_SPIKEIN_META:AWK_SCRIPT' { - ext.suffix = ".dedup" + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:EXTRACT_BT2_SPIKEIN_META:AWK_SCRIPT' { + ext.suffix = "_meta_bt2_spikein" publishDir = [ enabled: false ] @@ -425,8 +504,8 @@ if (params.run_alignment) { if (params.run_mark_dups) { process { - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:ANNOTATE_DEDUP_META:AWK' { - ext.suffix = ".awk" + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:EXTRACT_PICARD_DUP_META:AWK' { + ext.suffix = "_meta_picard_dups" ext.command = "'/^[^#]/{print}'" ext.command2 = " > int1.txt && head -2 int1.txt > int2.txt && sed 's/\\t/,/g' int2.txt > int3.txt && sed 's/.*/\\L&/g' int3.txt " publishDir = [ @@ -445,7 +524,7 @@ if (params.run_mark_dups) { if(params.run_peak_calling && (params.normalisation_mode == "Spikein" || params.normalisation_mode == "None")) { process { withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_GENOMECOV' { - ext.args = "-bg" + ext.args = params.extend_fragments ? '-bg -pc' : '-bg' publishDir = [ enabled: false ] @@ -460,7 +539,8 @@ if(params.run_peak_calling && (params.normalisation_mode != "Spikein" && params. '--outFileFormat bedgraph', '--skipNAs', "--binSize ${params.normalisation_binsize}", - "--normalizeUsing ${params.normalisation_mode}" + "--normalizeUsing ${params.normalisation_mode}", + params.extend_fragments ? '--extendReads' : '', ].join(' ').trim() ext.prefix = { "${meta.id}.bedgraph" } publishDir = [ @@ -506,10 +586,10 @@ if(params.run_peak_calling) { if(params.run_peak_calling && 'seacr' in params.callers) { process { withName: '.*:CUTANDRUN:SEACR_.*' { - ext.args = "non stringent" - ext.prefix = { "${meta.id}.seacr.peaks.bed" } + ext.args = "${params.seacr_norm} ${params.seacr_stringent}" + ext.prefix = { "${meta.id}.seacr.peaks" } publishDir = [ - path: { "${params.outdir}/03_peak_calling/04_called_peaks" }, + path: { "${params.outdir}/03_peak_calling/04_called_peaks/seacr" }, mode: "${params.publish_dir_mode}", pattern: "*.bed", enabled: true @@ -521,72 +601,29 @@ if(params.run_peak_calling && 'seacr' in params.callers) { if(params.run_peak_calling && 'macs2' in params.callers) { process { withName: '.*:CUTANDRUN:MACS2_.*' { - ext.args = "-p ${params.macs2_pvalue}" - ext.prefix = { "${meta.id}.macs2.peaks.bed" } - publishDir = [ - path: { "${params.outdir}/03_peak_calling/04_called_peaks" }, - mode: "${params.publish_dir_mode}", - pattern: "*.bed", - enabled: true - ] - } - } -} - -/* -======================================================================================== - CONSENSUS PEAKS -======================================================================================== -*/ - -if(params.run_peak_calling) { - process { - withName: '.*:AWK_NAME_PEAK_BED' { - ext.command = "'{OFS = \"\\t\"} {print \$0, FILENAME}'" - ext.ext = "bed" - publishDir = [ - enabled: false - ] - } - - withName: '.*:CONSENSUS_PEAKS:SORT|.*:CONSENSUS_PEAKS_ALL:SORT' { - ext.args = "-k1,1 -k2,2n" - ext.ext = "bed" - publishDir = [ - enabled: false - ] - } - - withName: '.*:CONSENSUS_PEAKS:BEDTOOLS_MERGE|.*:CONSENSUS_PEAKS_ALL:BEDTOOLS_MERGE' { - ext.args = " -c 2,3,4,5,6,7,7 -o collapse,collapse,collapse,collapse,collapse,collapse,count_distinct" - ext.prefix = { "${meta.id}.consensus.peaks" } + ext.args = [ + "-p ${params.macs2_pvalue}", + params.macs2_narrow_peak ? '' : "--broad --broad-cutoff ${params.macs2_broad_cutoff}" + ].join(' ').trim() + ext.prefix = { "${meta.id}.macs2" } publishDir = [ - path: { "${params.outdir}/03_peak_calling/05_consensus_peaks" }, + path: { "${params.outdir}/03_peak_calling/04_called_peaks/macs2" }, mode: "${params.publish_dir_mode}", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*.{bed,narrowPeak,broadPeak,xls}", enabled: true ] } - - withName: '.*:CONSENSUS_PEAKS:PLOT_CONSENSUS_PEAKS|.*:CONSENSUS_PEAKS_ALL:PLOT_CONSENSUS_PEAKS' { - publishDir = [ - path: { "${params.outdir}/04_reporting" }, - mode: "${params.publish_dir_mode}", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.skip_upset_plots - ] - } } } -if(params.run_peak_calling && !params.run_consensus_all) { +if(params.run_peak_calling && 'macs2' in params.callers) { process { - withName: '.*:CONSENSUS_PEAKS:AWK' { - ext.command = "' \$10 >= " + params.replicate_threshold.toString() + " {print \$0}'" + withName: '.*:CUTANDRUN:PEAK_TO_BED' { + ext.args = "-f 1-6" ext.ext = "bed" - ext.suffix = ".consensus.peaks.filtered" + ext.suffix = ".macs2.peaks" publishDir = [ - path: { "${params.outdir}/03_peak_calling/05_consensus_peaks" }, + path: { "${params.outdir}/03_peak_calling/04_called_peaks/macs2" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true @@ -595,16 +632,13 @@ if(params.run_peak_calling && !params.run_consensus_all) { } } -if(params.run_peak_calling && params.run_consensus_all) { +if(params.run_peak_calling && params.callers[0] == 'seacr') { process { - withName: '.*:CONSENSUS_PEAKS_ALL:AWK' { - ext.command = "' \$10 >= " + params.replicate_threshold.toString() + " {print \$0}'" - ext.ext = "bed" - publishDir = [ - path: { "${params.outdir}/03_peak_calling/05_consensus_peaks" }, - mode: "${params.publish_dir_mode}", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: true + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:AWK_EXTRACT_SUMMITS' { + ext.command = "'{split(\$6, summit, \":\"); split(summit[2], region, \"-\"); print summit[1]\"\\t\"region[1]\"\\t\"region[2]}'" + ext.suffix = ".max_signal" + publishDir = [ + enabled: false ] } } @@ -612,77 +646,45 @@ if(params.run_peak_calling && params.run_consensus_all) { /* ======================================================================================== - CALCULATE FRAGMENTS + CONSENSUS PEAKS ======================================================================================== */ if(params.run_peak_calling) { process { - withName: '.*:CALCULATE_FRAGMENTS:SAMTOOLS_VIEW' { - ext.args = "-F 0x04 -b" - ext.prefix = { "${meta.id}.mapped" } - publishDir = [ - enabled: false - ] - } - - withName: '.*:CALCULATE_FRAGMENTS:SAMTOOLS_SORT' { - ext.args = "-n" - ext.prefix = { "${meta.id}.mapped.sorted" } - publishDir = [ - enabled: false - ] - } - - withName: '.*:CALCULATE_FRAGMENTS:BEDTOOLS_BAMTOBED' { - ext.args = "-bedpe" + withName: '.*:AWK_NAME_PEAK_BED' { + ext.command = "'{OFS = \"\\t\"} {print \$0, FILENAME}'" + ext.ext = "bed" publishDir = [ enabled: false ] } - withName: '.*:CALCULATE_FRAGMENTS:AWK' { - ext.command = "'\$1==\$4 && \$6-\$2 < 1000 {print \$0}'" - ext.suffix = ".filt" + withName: '.*:CONSENSUS_PEAKS:SORT|.*:CONSENSUS_PEAKS_ALL:SORT' { + ext.args = "-k1,1 -k2,2n" ext.ext = "bed" publishDir = [ enabled: false ] } - withName: '.*:CALCULATE_FRAGMENTS:CUT' { - ext.args = "-f 1,2,6" - ext.command = "| sort -T '.' -k1,1 -k2,2n -k3,3n" - ext.suffix = ".frags" - ext.ext = "bed" - publishDir = [ - path: { "${params.outdir}/03_peak_calling/06_fragments" }, - mode: "${params.publish_dir_mode}", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: true - ] - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:AWK_FRAG_BIN' { - ext.args = "-v w=500" - ext.command = "'{print \$1, int((\$2 + \$3)/(2*w))*w + w/2, FILENAME}'" - ext.command2 = "| sort -T '.' -k1,1V -k2,2n | uniq -c | awk -v OFS=\"\\t\" '{print \$2, \$3, \$1, \$4}' | sort -T '.' -k1,1V -k2,2n" - ext.suffix = ".frags.bin500" - ext.ext = "bed" - publishDir = [ - path: { "${params.outdir}/03_peak_calling/06_fragments" }, + withName: '.*:CONSENSUS_PEAKS:BEDTOOLS_MERGE|.*:CONSENSUS_PEAKS_ALL:BEDTOOLS_MERGE' { + ext.args = " -c 2,3,4,5,6,7,7 -o collapse,collapse,collapse,collapse,collapse,collapse,count_distinct" + ext.prefix = { "${meta.id}.consensus.peak_counts" } + publishDir = [ + path: { "${params.outdir}/03_peak_calling/05_consensus_peaks" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:SAMTOOLS_CUSTOMVIEW' { - ext.args = "-F 0x04" - ext.args2 = "awk -F'\\t' 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs(\$9)}' | sort -T '.' | uniq -c | awk -v OFS=\"\\t\" '{print \$2, \$1/2}'" - ext.suffix = ".frags.len" - publishDir = [ - path: { "${params.outdir}/03_peak_calling/06_fragments" }, + withName: '.*:CONSENSUS_PEAKS:AWK|.*:CONSENSUS_PEAKS_ALL:AWK' { + ext.command = "' \$10 >= " + params.replicate_threshold.toString() + " {print \$0}'" + ext.ext = "bed" + ext.suffix = ".consensus.peaks" + publishDir = [ + path: { "${params.outdir}/03_peak_calling/05_consensus_peaks" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true @@ -710,20 +712,12 @@ if(params.run_reporting && params.run_igv) { } } -if(params.run_reporting && params.run_deep_tools) { +if(params.run_reporting && params.run_deeptools_heatmaps) { process { - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:AWK_EDIT_PEAK_BED' { - ext.command = "'{split(\$6, summit, \":\"); split(summit[2], region, \"-\"); print summit[1]\"\\t\"region[1]\"\\t\"region[2]}'" - ext.suffix = ".max_signal" - publishDir = [ - enabled: false - ] - } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_COMPUTEMATRIX_GENE' { - ext.args = "scale-regions --beforeRegionStartLength 3000 --regionBodyLength 5000 --afterRegionStartLength 3000 --skipZeros --missingDataAsZero" + ext.args = "scale-regions --beforeRegionStartLength ${params.dt_heatmap_gene_beforelen} --regionBodyLength ${params.dt_heatmap_gene_bodylen} --afterRegionStartLength ${params.dt_heatmap_gene_afterlen} --skipZeros --missingDataAsZero" publishDir = [ - path: { "${params.outdir}/04_reporting/heatmaps/gene" }, + path: { "${params.outdir}/04_reporting/deeptools_heatmaps/gene" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true @@ -733,7 +727,7 @@ if(params.run_reporting && params.run_deep_tools) { withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_PLOTHEATMAP_GENE' { ext.args = "--sortUsing sum" publishDir = [ - path: { "${params.outdir}/04_reporting/heatmaps/gene" }, + path: { "${params.outdir}/04_reporting/deeptools_heatmaps/gene" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true @@ -741,9 +735,9 @@ if(params.run_reporting && params.run_deep_tools) { } withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_COMPUTEMATRIX_PEAKS' { - ext.args = "reference-point -a 3000 -b 3000 --referencePoint center --skipZeros --missingDataAsZero" + ext.args = "reference-point -a ${params.dt_heatmap_peak_beforelen} -b ${params.dt_heatmap_peak_afterlen} --referencePoint center --skipZeros --missingDataAsZero" publishDir = [ - path: { "${params.outdir}/04_reporting/heatmaps/peaks" }, + path: { "${params.outdir}/04_reporting/deeptools_heatmaps/peaks" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true @@ -753,7 +747,7 @@ if(params.run_reporting && params.run_deep_tools) { withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_PLOTHEATMAP_PEAKS' { ext.args = "--sortUsing sum --startLabel \"Peak Start\" --endLabel \"Peak End\" --xAxisLabel \"\" --regionsLabel \"Peaks\"" publishDir = [ - path: { "${params.outdir}/04_reporting/heatmaps/peaks" }, + path: { "${params.outdir}/04_reporting/deeptools_heatmaps/peaks" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true @@ -762,15 +756,108 @@ if(params.run_reporting && params.run_deep_tools) { } } -if(params.run_reporting) { +if(params.run_reporting && params.run_deeptools_qc) { process { - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:CALCULATE_FRIP' { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_QC:DEEPTOOLS_MULTIBAMSUMMARY' { + ext.args = "--smartLabels --binSize ${params.dt_qc_bam_binsize}" + publishDir = [ + path: { "${params.outdir}/04_reporting/deeptools_qc" }, + mode: "${params.publish_dir_mode}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_QC:DEEPTOOLS_PLOTCORRELATION' { + ext.args = "--corMethod spearman --whatToPlot heatmap --skipZeros" + publishDir = [ + path: { "${params.outdir}/04_reporting/deeptools_qc" }, + mode: "${params.publish_dir_mode}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_QC:DEEPTOOLS_PLOTPCA' { + ext.args = "" + publishDir = [ + path: { "${params.outdir}/04_reporting/deeptools_qc" }, + mode: "${params.publish_dir_mode}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_QC:DEEPTOOLS_PLOTFINGERPRINT' { + ext.args = "--skipZeros" + publishDir = [ + path: { "${params.outdir}/04_reporting/deeptools_qc" }, + mode: "${params.publish_dir_mode}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + } +} + +if (params.run_reporting && params.run_peak_qc) { + process { + withName: '.*:EXTRACT_FRAGMENTS:SAMTOOLS_SORT' { + ext.args = "-n" + ext.prefix = { "${meta.id}.sortedbyname" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:EXTRACT_FRAGMENTS:BEDTOOLS_BAMTOBED' { + ext.args = "-bedpe" + publishDir = [ + enabled: false + ] + } + + withName: '.*:EXTRACT_FRAGMENTS:AWK' { + ext.command = "'\$1==\$4 && \$6-\$2 < 1000 {print \$0}'" + ext.suffix = ".filt" + ext.ext = "bed" + publishDir = [ + enabled: false + ] + } + + withName: '.*:EXTRACT_FRAGMENTS:CUT' { + ext.args = "-f 1,2,6" + ext.command = "| sort -T '.' -k1,1 -k2,2n -k3,3n" + ext.suffix = ".frags" + ext.ext = "bed" publishDir = [ + path: { "${params.outdir}/03_peak_calling/06_fragments_from_bams" }, + mode: "${params.publish_dir_mode}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PEAK_FRIP' { + publishDir = [ + enabled: false + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PRIMARY_PEAK_COUNTS' { + publishDir = [ enabled: false ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:CUT_CALC_REPROD' { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:CONSENSUS_PEAK_COUNTS' { + publishDir = [ + enabled: false + ] + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:CUT_CALC_REPROD' { ext.args = "-f 1,2,3,6" ext.suffix = ".repro" ext.ext = "bed" @@ -779,48 +866,49 @@ if(params.run_reporting) { ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:BEDTOOLS_INTERSECT' { - ext.args = "-C -sorted" + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:BEDTOOLS_INTERSECT' { + ext.args = "-C -sorted -f ${params.min_peak_overlap}" publishDir = [ enabled: false ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:CALCULATE_PEAK_REPROD' { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:CALCULATE_PEAK_REPROD' { publishDir = [ enabled: false ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:EXPORT_META' { - publishDir = [ - path: { "${params.outdir}/04_reporting" }, + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PLOT_CONSENSUS_PEAKS' { + publishDir = [ + path: { "${params.outdir}/04_reporting/consensus_upset_plots" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } + } +} - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:EXPORT_META_CTRL' { +if(params.run_reporting) { + process { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:SAMTOOLS_CUSTOMVIEW' { + ext.args = "-F 0x04" + ext.args2 = "awk -F'\\t' 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs(\$9)}' | sort -T '.' | uniq -c | awk -v OFS=\"\\t\" '{print \$2, \$1/2}'" + ext.suffix = ".frags.len" publishDir = [ - path: { "${params.outdir}/04_reporting" }, + path: { "${params.outdir}/03_peak_calling/06_fragments_from_bams" }, mode: "${params.publish_dir_mode}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: true ] } - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:GENERATE_REPORTS' { - publishDir = [ - path: { "${params.outdir}/04_reporting/qc" }, - mode: "${params.publish_dir_mode}", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: true + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:FRAG_LEN_HIST' { + publishDir = [ + enabled: false ] } - - - } } @@ -836,3 +924,4 @@ if (params.run_multiqc) { } } } + diff --git a/conf/resources.config b/conf/resources.config index da18803b..d6cb5942 100644 --- a/conf/resources.config +++ b/conf/resources.config @@ -2,37 +2,27 @@ ======================================================================================== nf-core/cutandrun Nextflow resource config file ======================================================================================== - Minimal and process-specific resource allocation. + Process-specific resource allocation. ---------------------------------------------------------------------------------------- */ /* ======================================================================================== - MINIMAL AND ULTRA LOW RESOURCE ALLOCATION + MODULE-SPECIFIC RESOURCE ALLOCATION ======================================================================================== */ -process { - withLabel:process_min { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - - withLabel:process_ultralow { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } +if(params.run_genome_prep) { + process { + withName: '.*:PREPARE_GENOME:BLACKLIST_BEDTOOLS_COMPLEMENT' { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 2.GB * task.attempt, 'memory' ) } + time = { check_max( 1.h * task.attempt, 'time' ) } + } } } -/* -======================================================================================== - MODULE-SPECIFIC RESOURCE ALLOCATION -======================================================================================== -*/ - -process { +if(params.run_alignment) { withName: '.*:ALIGN_BOWTIE2:BOWTIE2_ALIGN' { cpus = { check_max( 32 * task.attempt, 'cpus' ) } memory = { check_max( 64.GB * task.attempt, 'memory' ) } @@ -44,127 +34,42 @@ process { memory = { check_max( 64.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } } +} - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_COMPUTEMATRIX_GENE' { - cpus = { check_max( 16 * task.attempt, 'cpus' ) } - memory = { check_max( 32.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_COMPUTEMATRIX_PEAKS' { - cpus = { check_max( 16 * task.attempt, 'cpus' ) } - memory = { check_max( 32.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:CUSTOM_DUMPSOFTWAREVERSIONS' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:.*:SAMTOOLS_INDEX' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:.*:SAMTOOLS_STATS' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:.*:SAMTOOLS_STATS' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:.*:SAMTOOLS_IDXSTATS' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:.*:SAMTOOLS_FLAGSTAT' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - - withName: '.*CHROMSIZES' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - - // 1 CPU 8 GB - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_GENOMECOV' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_SORT' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDGRAPHTOBIGWIG' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:BEDTOOLS_INTERSECT' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withName: '.*:CONSENSUS_PEAKS:BEDTOOLS_MERGE|.*:CONSENSUS_PEAKS_ALL:BEDTOOLS_MERGE' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDCLIP' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - // ultralow - withName: '.*:CALCULATE_FRAGMENTS:BEDTOOLS_BAMTOBED' { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:SEACR_CALLPEAK' { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:FASTQC_TRIMGALORE:FASTQC' { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - - // memory - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_PLOTHEATMAP_PEAKS' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 32.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } +if(params.run_reporting) { + process { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:FRAG_LEN_HIST' { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } } +} - withName: 'NFCORE_CUTANDRUN:CUTANDRUN:.*:SAMTOOLS_VIEW' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 32.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } +if(params.run_reporting && params.run_deeptools_heatmaps) { + process { + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_COMPUTEMATRIX_GENE' { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 64.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_COMPUTEMATRIX_PEAKS' { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 64.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_PLOTHEATMAP_GENE' { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 64.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:DEEPTOOLS_PLOTHEATMAP_PEAKS' { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 64.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } } } diff --git a/conf/test.config b/conf/test.config index afecb63f..1cc9016b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -30,6 +30,4 @@ params { blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" spikein_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.fa.gz' spikein_bowtie2 = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.tar.gz' - - minimum_alignment_q_score = 10 } diff --git a/conf/test_fasta_only.config b/conf/test_fasta_only.config index e420f97d..8916f4cf 100644 --- a/conf/test_fasta_only.config +++ b/conf/test_fasta_only.config @@ -15,6 +15,4 @@ params { bowtie2 = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr20-bowtie2.tar.gz' spikein_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.fa.gz' spikein_bowtie2 = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.tar.gz' - - minimum_alignment_q_score = 10 } diff --git a/conf/test_full.config b/conf/test_full.config index d52dec47..e47972b4 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,7 +15,6 @@ params { config_profile_description = 'Full test dataset to check pipeline function' genome = 'GRCh38' + gene_bed = null input = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all.csv' - - minimum_alignment_q_score = 10 } diff --git a/conf/test_full_multi.config b/conf/test_full_multi.config index b7da5111..76e866d7 100644 --- a/conf/test_full_multi.config +++ b/conf/test_full_multi.config @@ -16,6 +16,4 @@ params { genome = 'GRCh38' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep.csv' - - minimum_alignment_q_score = 10 } diff --git a/conf/test_full_small.config b/conf/test_full_small.config index e05bbe85..ea86419f 100644 --- a/conf/test_full_small.config +++ b/conf/test_full_small.config @@ -18,6 +18,4 @@ params { blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" spikein_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.fa.gz' spikein_bowtie2 = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.tar.gz' - - minimum_alignment_q_score = 10 } diff --git a/conf/test_full_small_local_zip.config b/conf/test_full_small_local_zip.config index 89812236..747087ad 100644 --- a/conf/test_full_small_local_zip.config +++ b/conf/test_full_small_local_zip.config @@ -18,6 +18,4 @@ params { blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" spikein_fasta = '/home/runner/work/cutandrun/cutandrun/e_coli_U00096_3.fa.gz' spikein_bowtie2 = '/home/runner/work/cutandrun/cutandrun/e_coli_U00096_3.tar.gz' - - minimum_alignment_q_score = 10 } diff --git a/conf/test_local_zip.config b/conf/test_local_zip.config index 84b09fe2..71a71c82 100644 --- a/conf/test_local_zip.config +++ b/conf/test_local_zip.config @@ -30,6 +30,4 @@ params { blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" spikein_fasta = '/home/runner/work/cutandrun/cutandrun/e_coli_U00096_3.fa.gz' spikein_bowtie2 = '/home/runner/work/cutandrun/cutandrun/e_coli_U00096_3.tar.gz' - - minimum_alignment_q_score = 10 } diff --git a/conf/test_no_control.config b/conf/test_no_control.config index fa4a6c98..df051a7b 100644 --- a/conf/test_no_control.config +++ b/conf/test_no_control.config @@ -31,6 +31,5 @@ params { spikein_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.fa.gz' spikein_bowtie2 = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.tar.gz' - minimum_alignment_q_score = 10 use_control = false } diff --git a/conf/test_tech_reps.config b/conf/test_tech_reps.config index ead5004c..00c7526a 100644 --- a/conf/test_tech_reps.config +++ b/conf/test_tech_reps.config @@ -30,6 +30,4 @@ params { blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" spikein_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.fa.gz' spikein_bowtie2 = 'https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/e_coli_U00096_3.tar.gz' - - minimum_alignment_q_score = 10 } diff --git a/docs/images/consensus_peaks.png b/docs/images/consensus_peaks.png deleted file mode 100644 index 9e5c0a49..00000000 Binary files a/docs/images/consensus_peaks.png and /dev/null differ diff --git a/docs/images/cutandrun-flow-diagram-v1-0_2.png b/docs/images/cutandrun-flow-diagram-v1-0_2.png deleted file mode 100755 index a0a27425..00000000 Binary files a/docs/images/cutandrun-flow-diagram-v1-0_2.png and /dev/null differ diff --git a/docs/images/cutandrun-flow-diagram-v3.0.png b/docs/images/cutandrun-flow-diagram-v3.0.png new file mode 100755 index 00000000..fb08a711 Binary files /dev/null and b/docs/images/cutandrun-flow-diagram-v3.0.png differ diff --git a/docs/images/mqc_bowtie2_pe.png b/docs/images/mqc_bowtie2_pe.png deleted file mode 100644 index e41c54f8..00000000 Binary files a/docs/images/mqc_bowtie2_pe.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100644 index ca6db52f..00000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100644 index bcba22cb..00000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/images/all_consensus_peaks.png b/docs/images/output/all_consensus_peaks.png similarity index 100% rename from docs/images/all_consensus_peaks.png rename to docs/images/output/all_consensus_peaks.png diff --git a/docs/images/output/deeptools_correlation_plot.png b/docs/images/output/deeptools_correlation_plot.png new file mode 100644 index 00000000..3caee89b Binary files /dev/null and b/docs/images/output/deeptools_correlation_plot.png differ diff --git a/docs/images/output/deeptools_fingerprint_explan.png b/docs/images/output/deeptools_fingerprint_explan.png new file mode 100644 index 00000000..275b91d8 Binary files /dev/null and b/docs/images/output/deeptools_fingerprint_explan.png differ diff --git a/docs/images/output/deeptools_fingerprint_plot.png b/docs/images/output/deeptools_fingerprint_plot.png new file mode 100644 index 00000000..e71d8457 Binary files /dev/null and b/docs/images/output/deeptools_fingerprint_plot.png differ diff --git a/docs/images/output/deeptools_heatmap.png b/docs/images/output/deeptools_heatmap.png new file mode 100644 index 00000000..4463f3f1 Binary files /dev/null and b/docs/images/output/deeptools_heatmap.png differ diff --git a/docs/images/output/deeptools_pca_plot.png b/docs/images/output/deeptools_pca_plot.png new file mode 100644 index 00000000..da2cd6fa Binary files /dev/null and b/docs/images/output/deeptools_pca_plot.png differ diff --git a/docs/images/output/mqc_01_fastqc_sequence_counts.png b/docs/images/output/mqc_01_fastqc_sequence_counts.png new file mode 100644 index 00000000..716ca7ba Binary files /dev/null and b/docs/images/output/mqc_01_fastqc_sequence_counts.png differ diff --git a/docs/images/output/mqc_02_fastqc_per_base_sequence_quality.png b/docs/images/output/mqc_02_fastqc_per_base_sequence_quality.png new file mode 100644 index 00000000..31cf4d99 Binary files /dev/null and b/docs/images/output/mqc_02_fastqc_per_base_sequence_quality.png differ diff --git a/docs/images/output/mqc_03_fastqc_per_sequence_quality_scores.png b/docs/images/output/mqc_03_fastqc_per_sequence_quality_scores.png new file mode 100644 index 00000000..42e72276 Binary files /dev/null and b/docs/images/output/mqc_03_fastqc_per_sequence_quality_scores.png differ diff --git a/docs/images/output/mqc_04_fastqc_per_sequence_gc_content.png b/docs/images/output/mqc_04_fastqc_per_sequence_gc_content.png new file mode 100644 index 00000000..4cc1682a Binary files /dev/null and b/docs/images/output/mqc_04_fastqc_per_sequence_gc_content.png differ diff --git a/docs/images/output/mqc_05_fastqc_per_sequence_gc_content.png b/docs/images/output/mqc_05_fastqc_per_sequence_gc_content.png new file mode 100644 index 00000000..1ceea5f2 Binary files /dev/null and b/docs/images/output/mqc_05_fastqc_per_sequence_gc_content.png differ diff --git a/docs/images/output/mqc_06_fastqc_sequence_duplication_levels.png b/docs/images/output/mqc_06_fastqc_sequence_duplication_levels.png new file mode 100644 index 00000000..bb44f172 Binary files /dev/null and b/docs/images/output/mqc_06_fastqc_sequence_duplication_levels.png differ diff --git a/docs/images/output/mqc_07_fastqc_clear_overrep.png b/docs/images/output/mqc_07_fastqc_clear_overrep.png new file mode 100644 index 00000000..02b34ef8 Binary files /dev/null and b/docs/images/output/mqc_07_fastqc_clear_overrep.png differ diff --git a/docs/images/output/mqc_08_fastqc_adapter_content.png b/docs/images/output/mqc_08_fastqc_adapter_content.png new file mode 100644 index 00000000..131ef6d8 Binary files /dev/null and b/docs/images/output/mqc_08_fastqc_adapter_content.png differ diff --git a/docs/images/output/mqc_09_fastqc_clear_adapter.png b/docs/images/output/mqc_09_fastqc_clear_adapter.png new file mode 100644 index 00000000..6da5e569 Binary files /dev/null and b/docs/images/output/mqc_09_fastqc_clear_adapter.png differ diff --git a/docs/images/output/mqc_10_fastqc_overrepresented_sequences.png b/docs/images/output/mqc_10_fastqc_overrepresented_sequences.png new file mode 100644 index 00000000..1436d643 Binary files /dev/null and b/docs/images/output/mqc_10_fastqc_overrepresented_sequences.png differ diff --git a/docs/images/output/mqc_11_fastqc_overrepresented_sequences.png b/docs/images/output/mqc_11_fastqc_overrepresented_sequences.png new file mode 100644 index 00000000..6fc2c579 Binary files /dev/null and b/docs/images/output/mqc_11_fastqc_overrepresented_sequences.png differ diff --git a/docs/images/output/mqc_12_fastqc_adapter_content.png b/docs/images/output/mqc_12_fastqc_adapter_content.png new file mode 100644 index 00000000..45b80f77 Binary files /dev/null and b/docs/images/output/mqc_12_fastqc_adapter_content.png differ diff --git a/docs/images/output/mqc_13_fastqc_overrepresented_sequences.png b/docs/images/output/mqc_13_fastqc_overrepresented_sequences.png new file mode 100644 index 00000000..c8de2446 Binary files /dev/null and b/docs/images/output/mqc_13_fastqc_overrepresented_sequences.png differ diff --git a/docs/images/mqc_cutadapt_trimmed.png b/docs/images/output/mqc_14_fastqc_cutadapt_trimmed.png similarity index 100% rename from docs/images/mqc_cutadapt_trimmed.png rename to docs/images/output/mqc_14_fastqc_cutadapt_trimmed.png diff --git a/docs/images/output/mqc_15_bowtie2_pe.png b/docs/images/output/mqc_15_bowtie2_pe.png new file mode 100644 index 00000000..b97dff41 Binary files /dev/null and b/docs/images/output/mqc_15_bowtie2_pe.png differ diff --git a/docs/images/output/mqc_16_spikein_bowtie2_pe_plot.png b/docs/images/output/mqc_16_spikein_bowtie2_pe_plot.png new file mode 100644 index 00000000..0e94e6c6 Binary files /dev/null and b/docs/images/output/mqc_16_spikein_bowtie2_pe_plot.png differ diff --git a/docs/images/output/mqc_17_bowtie2_pe_plot.png b/docs/images/output/mqc_17_bowtie2_pe_plot.png new file mode 100644 index 00000000..45ce34d1 Binary files /dev/null and b/docs/images/output/mqc_17_bowtie2_pe_plot.png differ diff --git a/docs/images/output/mqc_18_preseq_plot.png b/docs/images/output/mqc_18_preseq_plot.png new file mode 100644 index 00000000..d4f39f46 Binary files /dev/null and b/docs/images/output/mqc_18_preseq_plot.png differ diff --git a/docs/images/mqc_picard_markduplicates.png b/docs/images/output/mqc_19_picard_markduplicates.png similarity index 100% rename from docs/images/mqc_picard_markduplicates.png rename to docs/images/output/mqc_19_picard_markduplicates.png diff --git a/docs/images/output/mqc_20_primary_peakcounts.png b/docs/images/output/mqc_20_primary_peakcounts.png new file mode 100644 index 00000000..0e8ffc27 Binary files /dev/null and b/docs/images/output/mqc_20_primary_peakcounts.png differ diff --git a/docs/images/output/mqc_21_fragment_lengths.png b/docs/images/output/mqc_21_fragment_lengths.png new file mode 100644 index 00000000..bc276899 Binary files /dev/null and b/docs/images/output/mqc_21_fragment_lengths.png differ diff --git a/docs/images/output/mqc_22_primary_peakrepro.png b/docs/images/output/mqc_22_primary_peakrepro.png new file mode 100644 index 00000000..914a2d61 Binary files /dev/null and b/docs/images/output/mqc_22_primary_peakrepro.png differ diff --git a/docs/images/py_frag_hist.png b/docs/images/py_frag_hist.png deleted file mode 100755 index a72571b5..00000000 Binary files a/docs/images/py_frag_hist.png and /dev/null differ diff --git a/docs/images/py_frags_in_peaks.png b/docs/images/py_frags_in_peaks.png deleted file mode 100755 index 3d2f6338..00000000 Binary files a/docs/images/py_frags_in_peaks.png and /dev/null differ diff --git a/docs/images/py_reproduced_peaks.png b/docs/images/py_reproduced_peaks.png deleted file mode 100755 index c9517b31..00000000 Binary files a/docs/images/py_reproduced_peaks.png and /dev/null differ diff --git a/docs/output.md b/docs/output.md index f358d36c..c37a4fbe 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,50 +1,64 @@ -# nf-core/cutandrun: Output + + +- 1. [Introduction](#Introduction) +- 2. [Preprocessing](#Preprocessing) + - 2.1. [Sample Sheet Check](#SampleSheetCheck) + - 2.2. [FASTQ Merging](#FASTQMerging) + - 2.3. [FastQC](#FastQC) + - 2.4. [Sequence Counts](#SequenceCounts) + - 2.4.1. [Sequence Quality](#SequenceQuality) + - 2.4.2. [Overrepresented Sequences](#OverrepresentedSequences) + - 2.5. [TrimGalore](#TrimGalore) +- 3. [Alignment](#Alignment) + - 3.1. [Bowtie 2](#Bowtie2) + - 3.2. [Library Complexity](#LibraryComplexity) +- 4. [Alignment post-processing](#Alignmentpost-processing) + - 4.1. [Quality Filtering](#QualityFiltering) + - 4.2. [PICARD MarkDuplicates/RemoveDuplicates](#PICARDMarkDuplicatesRemoveDuplicates) +- 5. [Fragment-based QC](#Fragment-basedQC) + - 5.1. [PCA](#PCA) + - 5.2. [Fingerprint](#Fingerprint) + - 5.3. [Correlation](#Correlation) +- 6. [Peak Calling](#PeakCalling) + - 6.1. [Bam to bedgraph](#Bamtobedgraph) + - 6.2. [Bed to bigwig](#Bedtobigwig) + - 6.3. [SEACR peak calling](#SEACRpeakcalling) + - 6.4. [MACS2 peak calling](#MACS2peakcalling) + - 6.5. [Consensus Peaks](#ConsensusPeaks) +- 7. [Peak-based QC](#Peak-basedQC) + - 7.1. [Peak Counts](#PeakCounts) + - 7.2. [Peak Reproducibility](#PeakReproducibility) + - 7.3. [FRiP Score](#FRiPScore) +- 8. [Fragment Length Distribution](#FragmentLengthDistribution) + - 8.1. [Heatmaps](#Heatmaps) + - 8.2. [Upset Plots](#UpsetPlots) + - 8.3. [IGV](#IGV) +- 9. [Workflow reporting and genomes](#Workflowreportingandgenomes) + - 9.1. [Reference genome files](#Referencegenomefiles) + - 9.2. [Pipeline information](#Pipelineinformation) + + + + + +## 1. Introduction + +This document describes the output produced by the pipeline. This document can be used as a general guide for CUT&RUN analysis. Unless specified all outputs shown are from the MultiQC report generated at the end of the pipeline run. -## Introduction +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. -This document describes the output produced by the pipeline. Example plots are taken from the pdf report which details summary details and analyses specific to CUT&Run/CUT&Tag data, and the MultiQC report, which summarises results from some tools used, at the end of the pipeline. +## 2. Preprocessing -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. +### 2.1. Sample Sheet Check + +The first step of the pipeline is to verify the sample sheet structure and experimental design to ensure that it is valid. -## Pipeline overview - -The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - -- [nf-core/cutandrun: Output](#nf-corecutandrun-output) - - [Introduction](#introduction) - - [Pipeline overview](#pipeline-overview) - - [Preprocessing](#preprocessing) - - [Samplesheet check](#samplesheet-check) - - [Fastq merging](#fastq-merging) - - [FastQC](#fastqc) - - [TrimGalore](#trimgalore) - - [Alignment](#alignment) - - [Bowtie 2](#bowtie-2) - - [Alignment post-processing](#alignment-post-processing) - - [samtools](#samtools) - - [picard MarkDuplicates/RemoveDuplicates](#picard-markduplicatesremoveduplicates) - - [Peak Calling](#peak-calling) - - [Bam to bedgraph](#bam-to-bedgraph) - - [Clip bedfiles](#clip-bedfiles) - - [Bed to bigwig](#bed-to-bigwig) - - [SEACR peak calling](#seacr-peak-calling) - - [BEDtools](#bedtools) - - [Reporting](#reporting) - - [Python reporting](#python-reporting) - - [MultiQC](#multiqc) - - [IGV](#igv) - - [Deeptools](#deeptools) - - [Workflow reporting and genomes](#workflow-reporting-and-genomes) - - [Reference genome files](#reference-genome-files) - - [Pipeline information](#pipeline-information) - -## Preprocessing - -### Samplesheet check - -The first step of the pipeline is to verify the samplesheet structure and experimental design to ensure that it is valid. - -### Fastq merging +### 2.2. FASTQ Merging + +If multiple libraries/runs have been provided for the same sample in the input sample sheet (e.g. to increase sequencing depth), then these will be merged at the very beginning of the pipeline. Please refer to the [usage documentation](https://nf-co.re/rnaseq/usage#samplesheet-input) to see how to specify this type of sample in the input sample sheet.
Output files @@ -54,9 +68,7 @@ The first step of the pipeline is to verify the samplesheet structure and experi
-If multiple libraries/runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage documentation](https://nf-co.re/rnaseq/usage#samplesheet-input) to see how to specify these samples in the input samplesheet. - -### FastQC +### 2.3. FastQC
Output files @@ -71,11 +83,77 @@ If multiple libraries/runs have been provided for the same sample in the input s [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) +We perform FastQC on reads before and after trimming. The descriptions below apply to both reporting unless explicitly mentioned. + +### 2.4. Sequence Counts + +This first FastQC report provides a first look at how many reads your FASTQ files contain. Predicted duplicates are also shown in black however, we recommend using the PICARD duplicate reports as they are more detailed and accurate. + +As a general rule of thumb for an abundant epitope like H3K27me3, we recommend no fewer than 5M aligned reads, the sequence counts must reflect this number plus any unaligned error reads. Assuming a max alignment error rate of 20%, we recommend a minimum read count of 6M raw reads. Less abundant epitopes may require deeper sequencing for a good signal. + +Another thing to look out for in this plot is the consistency of reads across your various groups and replicates. Large differences in the number of reads between biological replicates may be indicative of technical variation arising due to human error, or problems with the wet-lab protocol or sequencing process. + +![MultiQC - FastQC sequence counts plot](images/output/mqc_01_fastqc_sequence_counts.png) + +#### 2.4.1. Sequence Quality + +FastQC provides several reports that look at sequence quality from different views. The mean quality scores plot shows the sequence quality along the length of the read. It is normal to see some drop off in quality towards the end of the read, especially with long-read sequencing (150 b.p.). The plot should be green and with modern sequencing on good quality samples, should be > 30 throughout the majority of the read. There are many factors that affect the read quality but users can expect drops in average score when working with primary tissue or other tissues that are difficult to sequence. + +![mqc_02_fastqc_per_base_sequence_quality](images/output/mqc_02_fastqc_per_base_sequence_quality.png) + +The Per-sequence quality score report shows a different view of sequencing quality, showing the distribution of scores for each sample. This chart will peak where the majority of the reads are scored. In modern Illumina sequencing this should be curve at the end of the chart in an area > 30. + +![f](images/output/mqc_03_fastqc_per_sequence_quality_scores.png) + +The Per-base sequence quality report is not applicable to CUT&RUN data generally. Any discordant sequence content at the beginning of the reads is a common phenomenon for CUT&RUN reads. Failing to pass the Per-base sequence content does not mean your data failed. It can be due to Tn5 preferential binding or what you might be detecting is the 10-bp periodicity that shows up as a sawtooth pattern in the length distribution. If so, this is normal and will not affect alignment or peak calling. + +The Per-sequence GC content report shows the distribution of the GC content of all reads in a sample. This should be centred around the average GC content % for your target organism. + +![mqc_04_fastqc_per_sequence_gc_content](images/output/mqc_04_fastqc_per_sequence_gc_content.png) + +An unusually shaped distribution such as one with dual peaks could indicate a contaminated library or some other kind of biased subset. In the image below, we see a signifiant batch effect between two groups of samples run on different days. The samples in red most likely are contaminated with DNA that has a different GC content to that of the target organism. + +![plot](images/output/mqc_05_fastqc_per_sequence_gc_content.png) + +#### 2.4.2. Overrepresented Sequences + +FastQC provides three reports that focus on overrepresented sequences at the read level. All three must be looked at both before and after trimming to gain a detailed view of the types of sequence duplication that your samples contain. + +A normal high-throughput library will contain a diverse set of sequences with no individual sequence making up a large fraction of the whole library. Finding that a single sequence is overrepresented can be biologically significant, however it also often indicates that the library is contaminated, or is not as diverse as expected. + +The sequence duplication level plot shows percentages of the library that has a particular range of duplication counts. For example, the plot below shows that for some samples ~10% of the library has > 100 duplicated sequences. While not particularly informative on its own, if problems are identified in subsequent reports, it can help to identify the scale of the duplication problem. -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) +![plot](images/output/mqc_06_fastqc_sequence_duplication_levels.png) -### TrimGalore +The second two reports must be analysed together both before and after trimming for maximum insight. The first report, overrepresented sequences, shows the percentage of identified sequences in each library; the second, adapter content, shows a cumulative plot of adapter content at each base position. Due to the short insert length for CUT&RUN, short read length sequencing (25 b.p.) is possible for samples; consequently, when the sequencing length increases, more of the sequencing adapters will be sequenced. Adapter sequences should always be trimmed off, therefore, it is important to look at both of these plots after trimming as well to check that the adapter content has been removed and there are only small levels of overrepresented sequences. A clear adapter content plot after trimming but where there are still significant levels of overrepresented sequences could indicate something biologically significant or experimental error such as contamination. + +**A 25 b.p CUT&Tag experiment with clear reports even before trimming** + +![plot](images/output/mqc_07_fastqc_clear_overrep.png) + +**A 150 b.p. CUT&RUN experiment with significant adapter content before trimming but that is clear after trimming** + +![plot](images/output/mqc_08_fastqc_adapter_content.png) + +![plot](images/output/mqc_10_fastqc_overrepresented_sequences.png) + +**After trimming** + +![plot](images/output/mqc_07_fastqc_clear_overrep.png) + +**CUT&RUN experiment that shows over represented sequences even after trimming all the adapter content away** + +![plot](images/output/mqc_11_fastqc_overrepresented_sequences.png) + +![plot](images/output/mqc_12_fastqc_adapter_content.png) + +**After trimming** + +![plot](images/output/mqc_13_fastqc_overrepresented_sequences.png) + +![plot](images/output/mqc_09_fastqc_clear_adapter.png) + +### 2.5. TrimGalore
Output files @@ -93,11 +171,13 @@ If multiple libraries/runs have been provided for the same sample in the input s > **NB:** TrimGalore! will only run using multiple cores if you are able to use more than > 5 and > 6 CPUs for single- and paired-end data, respectively. The total cores available to TrimGalore! will also be capped at 4 (7 and 8 CPUs in total for single- and paired-end data, respectively) because there is no longer a run-time benefit. See [release notes](https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019) and [discussion whilst adding this logic to the nf-core/atacseq pipeline](https://github.com/nf-core/atacseq/pull/65). -![MultiQC - cutadapt trimmed sequence length plot](images/mqc_cutadapt_trimmed.png) +**Typical plot showing many small sequences being trimmed from reads** + +![MultiQC - cutadapt trimmed sequence length plot](images/output/mqc_14_fastqc_cutadapt_trimmed.png) -## Alignment +## 3. Alignment -### Bowtie 2 +### 3.1. Bowtie 2
Output files @@ -106,34 +186,51 @@ If multiple libraries/runs have been provided for the same sample in the input s
-Adapter-trimmed reads are mapped to the target and spike-in genomes using [Bowtie 2](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml). A genome index is required to run Bowtie2 which is created automatically from the genome fasta input. - -The pipeline will output the `.bam` files with index and samtools stats for only the final set by default. For example, the full pipeline will only output picard duplicates processed files as this is the final step before peak calling. If the pipeline is run with `--only_align`, then the `bam` files from the initial sorting and indexing will be copied to the output directory as the other steps are not run. +Adapter-trimmed reads are mapped to the target and spike-in genomes using [Bowtie 2](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml). The pipeline will output the `.bam` files with index and samtools stats for only the final set by default. For example, the full pipeline will only output picard duplicates processed files as this is the final step before peak calling. If the pipeline is run with `--only_align`, then the `bam` files from the initial sorting and indexing will be copied to the output directory as the other steps are not run. If `--save_align_intermed` is specified then all the `bam` files from all stages will be copied over to the output directory. If `--save_spikein_aligned` is specified then the spike-in alignment files will also be published. -![MultiQC - Bowtie2 paired-end mapping stats](images/mqc_bowtie2_pe.png) +MultiQC shows several alignment-based reports however, the most important is the alignment score plot. A typical plot for a well-defined genome will look like the image below with high alignment scores and low levels of multi-mapped and unaligned sequences. Low levels of alignment can be due to a multitude of different factors but is generally a strong sign that the input library was of poor quality. That being said, if the total number of aligned reads is still above the required level for the target epitope abundance then it does not mean the sample has failed, as there still may be enough information to answer the biological question asked. -## Alignment post-processing +![MultiQC - Bowtie2 paired-end mapping stats](images/output/mqc_15_bowtie2_pe.png) -### samtools +The MultiQC report also includes a spike-in alignment report. This plot is important for deciding whether to normalise your samples using the spike-in genome or by another method. The default mode in the pipeline is to normalise stacked reads before peak calling for epitope abundance using spike-in normalisation. -
-Output files +Traditionally, E. coli DNA is carried along with bacterially-produced enzymes that are used in CUT&RUN and CUT&Tag experiments and gets tagmented non-specifically during the reaction. The fraction of total reads that map to the E.coli genome depends on the yield of the epitope targeted, and so depends on the number of cells used and the abundance of that epitope in chromatin. Since a constant amount of protein is added to the reactions and brings along a fixed amount of E. coli DNA, E. coli reads can be used to normalise against epitope abundance in a set of experiments. This is assuming that the amount of E.coli and the numbers of cells is consistent between samples. + +Since the introduction of these techniques there are several factors that have reduced the usefulness of this type of normalisation in certain experimental conditions. Firstly, many commercially available kits now have very low levels of E.coli DNA in them, which therefore requires users to spike-in their own DNA for normalisation which is not always done. Secondly the normalisation approach is dependant on the cell count between samples being constant, which in our experience is quite difficult to achieve especially in primary tissue samples. + +The image below shows a typical plot for samples that have the correct amount of spike-in DNA for normalisation. The target samples have usually < 1% spike-in alignment (but still with > 1000 reads to reach above noise thresholds) while IgG should have the most as it has a large abundance (2-5%). In this example, the IgG will be brought inline with the other epitopes enabling proper peak calling using the IgG as a background. + +![plot](images/output/mqc_16_spikein_bowtie2_pe_plot.png) + +If you see very low spike-in levels for all samples, it is likely your Tn5 had no residual E.coli DNA and that no additional spike-in DNA was added. In this case spike-in normalisation cannot be used and normalisation must be switched to read count or no normalisation at all. + +If you see a strange distribution of spike-in DNA alignment that does not fit with your knowledge of the relative abundance of your IgG and target epitopes, this is indicative of a problem with the spike-in process and again, another normalisation option should be chosen. -- `aligner/bowtie2/intermediate/` - - `.filtered.bam`: If `--publish_align_intermeds` is specified the original BAM file containing read alignments to the target genome will be placed in this directory. - - `.filtered.bam.bai`: BAI file for BAM. -- `aligner/bowtie2/intermediate/samtools_stats` - - `.filtered.bam.*stats`: various statistics regarding the BAM files. +In the plot below, it may initially look as though the spike-in distribution is too varied to be useful however, the larger IgG spike-in alignment counts correspond to the target samples with more sequencing depth and more spike-in alignments; therefore, these samples are actually good candidates for spike-in normalisation. + +![plot](images/output/mqc_17_bowtie2_pe_plot.png) + +### 3.2. Library Complexity + +To estimate library complexity and identify potentially over-sequenced libraries (or libraries with a low information content) we run preseq. [Library Complexity](http://smithlabresearch.org/software/preseq/) estimates the complexity of a library, showing how many additional unique reads are sequenced for increasing total read count. A shallow curve indicates complexity saturation. The dashed line shows a perfectly complex library where total reads = unique reads. + +The plot below shows a group of samples where the majority of unique molecules are accounted for by 50M. The total molecules detected stretches beyond 250M indicating the library is over sequenced and that an identical future experiment could be sequenced to a lower depth without loosing information. + +![plot](images/output/mqc_18_preseq_plot.png) + +## 4. Alignment post-processing + +### 4.1. Quality Filtering
-BAM files are filtered for a minimum quality score of 0 using [SAMtools](http://samtools.sourceforge.net/). +BAM files are filtered for a minimum quality score and for fully mapped reads using [SAMtools](http://samtools.sourceforge.net/). These results are then passed on to Picard for duplicate removal. -### picard MarkDuplicates/RemoveDuplicates +### 4.2. PICARD MarkDuplicates/RemoveDuplicates
Output files @@ -150,11 +247,45 @@ By default, the pipeline uses [picard MarkDuplicates](https://broadinstitute.git If your data includes IgG controls, these will additionally be de-duplicated. It is not the normal protocol to de-duplicate the target reads, however, if this is required, use the `--dedup_target_reads true` switch. -![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_markduplicates.png) +The plot below shows a typical CUT&Tag experiment that has its PCR cycles optimised. We see a low level of non-optical duplication (from library amplification) in the target samples but more in the IgG samples as the reads in these samples derive from non-specific tagmentation in the CUT&Tag reactions. + +![MultiQC - Picard MarkDuplicates metrics plot](images/output/mqc_19_picard_markduplicates.png) + +High levels of duplication are not necessarily a problem as long as they are consistent across biological replicates or other comparable groups. Given that the target samples are not de-duplicated by default, if the balance of duplicate reads is off when comparing two samples, it may lead to inaccurate peak calling and subsequent spurious signals. High levels of non-optical duplication are indicative of over-amplified samples. + +## 5. Fragment-based QC + +This section of the pipeline deals with quality control at the aligned fragment level. The read fragments are counted by binning them into regions genome-wide. The default is 500 b.p. but this can be changed using `dt_qc_bam_binsize`. All of the plots shown below are calculated from this initial binned data-set. + +### 5.1. PCA + +Descriptions taken from the deepTools [manual]([plotFingerprint — deepTools 3.5.0 documentation](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html)) + +Principal component analysis (PCA) can be used, for example, to determine whether samples display greater variability between experimental conditions than between replicates of the same treatment. PCA is also useful to identify unexpected patterns, such as those caused by batch effects or outliers. Principal components represent the directions along which the variation in the data is maximal, so that the information from thousands of regions can be represented by just a few dimensions. + +![plot](images/output/deeptools_pca_plot.png) + +### 5.2. Fingerprint + +Descriptions taken from the deepTools [manual](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html) + +This tool is based on a method developed by [Diaz et al.](http://www.ncbi.nlm.nih.gov/pubmed/22499706). It determines how well the signal in the CUT&RUN/Tag sample can be differentiated from the background distribution of reads in the control sample. For factors that exhibit enrichment of well-defined and relatively narrow regions (e.g. transcription factors such as p300), the resulting plot can be used to assess the strength of a CUT&RUN experiment. However, when broad regions of enrichment are to be expected, the less clear the plot will be. Vice versa, if you do not know what kind of signal to expect, the fingerprint plot will give you a straight-forward indication of how careful you will have to be during your downstream analyses to separate noise from meaningful biological signal. + +![plot](images/output/deeptools_fingerprint_plot.png) + +![plot](images/output/deeptools_fingerprint_explan.png) + +### 5.3. Correlation + +Descriptions taken from the deepTools [manual](https://deeptools.readthedocs.io/en/develop/content/tools/plotCorrelation.html) + +Computes the overall similarity between two or more samples based on read coverage within genomic regions. The result of the correlation computation is a table of correlation coefficients that indicates how “strong” the relationship between two samples is (values are between -1 and 1: -1 indicates perfect anti-correlation, 1 perfect correlation.) + +![plot](images/output/deeptools_correlation_plot.png) -## Peak Calling +## 6. Peak Calling -### Bam to bedgraph +### 6.1. Bam to bedgraph
Output files @@ -166,9 +297,7 @@ If your data includes IgG controls, these will additionally be de-duplicated. It Converts bam files to the bedgraph format. -### Clip bedfiles - -### Bed to bigwig +### 6.2. Bed to bigwig
Output files @@ -180,107 +309,96 @@ Converts bam files to the bedgraph format. The [bigWig](https://genome.ucsc.edu/goldenpath/help/bigWig.html) format is an indexed binary format useful for displaying dense, continuous data in Genome Browsers such as the [UCSC](https://genome.ucsc.edu/cgi-bin/hgTracks) and [IGV](http://software.broadinstitute.org/software/igv/). This mitigates the need to load the much larger BAM files for data visualisation purposes which will be slower and result in memory issues. The bigWig format is also supported by various bioinformatics software for downstream processing such as meta-profile plotting. -### SEACR peak calling +### 6.3. SEACR peak calling
Output files - `03_peak_calling/04_called_peaks/` - - `.peaks*.bed`: BED file containing peak coordinates and peak signal. + - BED file containing peak coordinates and peak signal.
-[SEACR](https://github.com/FredHutch/SEACR) is a peak caller for data with low background-noise, so is well suited to CUT&Run/CUT&Tag data. SEACR can take in IgG control bedGraph files in order to avoid calling peaks in regions of the experimental data for which the IgG control is enriched. If `--igg_control false` is specified, SEACR calls enriched regions in target data by selecting the top 5% of regions by AUC by default. This threshold can be overwritten using `--peak_threshold`. +[SEACR](https://github.com/FredHutch/SEACR) is a peak caller for data with low background-noise, so is well suited to CUT&Run/CUT&Tag data. SEACR can take in IgG control bedGraph files in order to avoid calling peaks in regions of the experimental data for which the IgG control is enriched. If `--use_control false` is specified, SEACR calls enriched regions in target data by selecting the top 5% of regions by AUC by default. This threshold can be overwritten using `--seacr_peak_threshold`. -![Python reporting - peaks reproduced](images/py_reproduced_peaks.png) +### 6.4. MACS2 peak calling -![Python reporting - aligned fragments within peaks](images/py_frags_in_peaks.png) +- `03_peak_calling/04_called_peaks/` + - BED file containing peak coordinates and peak signal. -### Bedtools +MACS2 is a peak caller used in many other experiments such as ATAC-seq and ChIP-seq. It can deal with high levels of background noise but is generally less sensitive than SEACR. If you are having trouble calling peaks in SEACR, we recommend switching to this peak caller, especially if your QC is saying that you have a high level of background noise. -
-Output files +MACS2 has its main parameters exposed through the pipeline configuration. The default p-values and genome size can be changed using the `--macs2_pvalue` and `--macs2_gsize` parameters. MACS2 has two calling modes: narrow and broad peak. We recommend using broad peak for epitopes with a wide peak range such as histone marks, and narrow peak for small binding proteins such as transcription factors. This mode can be changed using `--macs2_narrow_peak`. -- `seacr/` - - `{group}.consensus_peaks.pdf`: schematic showing which consensus peaks are shared across replicates within groups - - `all_peaks.consensus_peaks.pdf`: schematic showing which consensus peaks are shared across all samples -- `seacr/consensus_peaks` - - `{group}.consensus.peaks.bed`: BED containing consensus peaks for each group - - `all_peaks.consensus.peaks.bed`: BED containing consensus peaks across all samples +### 6.5. Consensus Peaks
-The merge function from [BEDtools](https://github.com/arq5x/bedtools2) is used to merge replicate peaks of the same experimental group to create a consensus peak set. This can then optionally be filtered for consensus peaks contributed to be a threshold number of replicates using `--replicate_threshold`. Additionally, the same workflow is run merging across all samples. +The merge function from [BEDtools](https://github.com/arq5x/bedtools2) is used to merge replicate peaks of the same experimental group to create a consensus peak set. This can then optionally be filtered for consensus peaks contributed to be a threshold number of replicates using `--replicate_threshold`. -![Peak calling - group consensus peak plot](images/consensus_peaks.png) -![Peak calling - group consensus peak plot](images/all_consensus_peaks.png) +## 7. Peak-based QC -## Reporting +Once the peak calling process is complete, we run a separate set of reports that analyse the quality of the results at the peak level. -### Python reporting +### 7.1. Peak Counts -
-Output files +For both the sample peaks and the consensus peaks, a simple count is taken. At the sample level, it is important to see consistency between peak counts of biological replicates. It is the first indicator of whether you replicates samples agree with each other after all of the processing has completed. If you use the consensus peaks and use a replicate threshold of more than 1, it is also important to see how many of your peaks across replicates have translated into consensus peaks. -- `04_reporting/qc/` - - `report.pdf`: PDF report of all plots. - - `*.png`: individual plots featured in the PDF report. - - `*.csv`: corresponding data used to produce the plot. +In the image below we see comparable peak counts for the H3K27me3 dataset, but a large disparity for the H3K4me3. -
+![plot](images/output/mqc_20_primary_peakcounts.png) -Additional QC and analysis pertaining particularly to CUT&Run and CUT&Tag data are reported in this module. This report was adapted in python from the original CUT&Tag analysis [protocol](https://yezhengstat.github.io/CUTTag_tutorial/) from the [Henikoff Lab](https://research.fredhutch.org/henikoff/en.html). +### 7.2. Peak Reproducibility -![Python reporting - fragment length distribution](images/py_frag_hist.png) +The peak reproducibility report intersects all samples within a group using `bedtools intersect` with a minimum overlap controlled by `min_peak_overlap`. This report is useful along with the peak count report for estimating how reliable the peaks called are between your biological replicates. -### MultiQC +For example, in the image below when combined with the peak count information we see that although the H3K27me3 replicates both have similar peak counts, < 30% of the peaks are replicated across the replicate set. For H3K4me3, we see that replicate 1 has a small number of peaks called, but that almost 100% of those peaks are replicated in the second replicate. Replicate 2 has < 20% of its replicates reproduced in replicate 1 but by looking at the peak counts we can see this is due to the low number of peaks called. -[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. +![plot](images/output/mqc_22_primary_peakrepro.png) -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +### 7.3. FRiP Score -
-Output files +Fraction of fragments in peaks (FRiP), defined as the fraction of all mapped paired-end reads extended into fragments that fall into the called peak regions, i.e. usable fragments in significantly enriched peaks divided by all usable fragments. In general, FRiP scores correlate positively with the number of regions. (Landt et al, Genome Research Sept. 2012, 22(9): 1813–1831). A minimum overlap is controlled by `min_frip_overlap`. The FRiP score can be used to assess the overall quality of a sample. Poor samples with a high level of background noise, small numbers of called peaks or other issues will have a large number of fragments falling outside the peaks that were called. Generally FRiP scores > 0.3 are considered to be reasonable with the highest quality data having FRiP scores of > 0.7. -- `04_reporting/multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. +It is worth noting that the peak caller settings are also crucial to this score, as even the highest quality data will have a low FRiP score if the pipeline is parameterised in a way that calls few peaks, such as setting the peak calling threshold very high. -
+## 8. Fragment Length Distribution -### IGV +CUT&Tag inserts adapters on either side of chromatin particles in the vicinity of the tethered enzyme, although tagmentation within chromatin particles can also occur. So, CUT&Tag reactions targeting a histone modification predominantly results in fragments that are nucleosomal lengths (~180 bp), or multiples of that length. CUT&Tag targeting transcription factors predominantly produce nucleosome-sized fragments and variable amounts of shorter fragments, from neighbouring nucleosomes and the factor-bound site, respectively. Tagmentation of DNA on the surface of nucleosomes also occurs, and plotting fragment lengths with single-basepair resolution reveal a 10-bp sawtooth periodicity, which is typical of successful CUT&Tag experiments. -
-Output files +**NB:** Experiments targeting transcription factors may produce different fragment distributions depending on factors beyond the scope of this article. -- `04_reporting/igv/` - - `igv_session.xml`: IGV session. - - `*.txt`: IGV input file configurations. +![plot](images/output/mqc_21_fragment_lengths.png) -
+### 8.1. Heatmaps -An IGV session file will be created at the end of the pipeline containing the normalised bigWig tracks, per-sample peaks, target genome fasta and annotation GTF. Once installed, open IGV, go to File > Open Session and select the igv_session.xml file for loading. +Heatmaps for both genomic features and peaks are generated using deepTools. The parameters for the gene heatmap generation including kilobases to map before and after the gene body can be found with the prefix `dt_heatmap_gene_*`. Similarly, the peak-based heatmap parameters can be found using `dt_heatmap_peak_*`. -> **NB:** If you are not using an in-built genome provided by IGV you will need to load the annotation yourself e.g. in .gtf and/or .bed format. +**NB:** These reports are generated outside of MultiQC -### Deeptools +![plot](images/output/deeptools_heatmap.png) -
-Output files +### 8.2. Upset Plots + +Upset plots provide a different view on which sets of peaks are overlapping across different samples. Use in conjunction with the other peak-based QC metrics. + +**NB:** These reports are generated outside of MultiQC -- `04_reporting/heatmaps//` - - `.plotHeatmap.pdf`: heatmap PDF. - - `.computeMatrix.mat.gz`: heatmap matrix. - - `*.mat.tab`: matrix and heatmap configs. +![plot](images/output/all_consensus_peaks.png)
-[deeptools](https://github.com/deeptools/deepTools/) sub-tools computeMatrix and plotHeatmap are used to assess the distribution of fragments around genes and peaks. +### 8.3. IGV + +
+ +An IGV session file will be created at the end of the pipeline containing the normalised bigWig tracks, per-sample peaks, target genome fasta and annotation GTF. Once installed, open IGV, go to File > Open Session and select the `igv_session.xml` file for loading. + +> **NB:** If you are not using an in-built genome provided by IGV you will need to load the annotation yourself e.g. in .gtf and/or .bed format. -## Workflow reporting and genomes +## 9. Workflow reporting and genomes -### Reference genome files +### 9.1. Reference genome files
Output files @@ -296,7 +414,7 @@ An IGV session file will be created at the end of the pipeline containing the no A number of genome-specific files are generated by the pipeline because they are required for the downstream processing of the results. If the `--save_reference` parameter is provided then these will be saved in the `00_genome/` directory. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build new indices so that you can save them somewhere locally. The index building step can be quite a time-consuming process and it permits their reuse for future runs of the pipeline to save disk space. -### Pipeline information +### 9.2. Pipeline information
Output files diff --git a/docs/usage.md b/docs/usage.md index 7f9b91cb..f9699e06 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,9 +46,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: -```console +```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) + # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -57,7 +57,7 @@ work # Directory containing the nextflow working files When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/cutandrun ``` @@ -67,7 +67,29 @@ nextflow pull nf-core/cutandrun There are some options detailed on the parameters page that are prefixed with `save`, `skip` or `only`. These are flow control options that allow for saving additional output to the results directory, skipping unwanted portions of the pipeline or running the pipeline up to a certain point, which can be useful for testing. -### De-duplication +### Genome Configuration + +The easiest way to run the pipeline is by using one of the pre-configured genomes that reflect the available genomes at [iGenomes]([AWS iGenomes](https://ewels.github.io/AWS-iGenomes/)). Assign `genome` to one of the key words for iGenomes and all the available reference data will be automatically fetched. The pipeline uses the following reference data: + +- Target genome FASTA + +- Target genome Bowtie2 Index + +- Target genome GTF + +- Target genome BED (will be generated from the GTF if not supplied) + +- Target genome Blacklist (blacklist files for major genomes are included in the pipeline) + +- Spike-in genome FASTA + +- Spike-in genome Bowtie2 Index + +If the `genome` parameter is not supplied, the user must provide all the target genome data themselves (except the gene BED file). The default spike-in genome is e.coli given that this is the natural spike-in product of the protein production process. However, it is possible to spike-in different DNA during the experimental protocol and then set the `spikein_genome` to the target organism. + +### Read Filtering and Duplication + +After alignment using Bowtie2, mapped reads are filtered to remove those which do not pass a minimum quality threshold. This threshold can be changed using the `minimum_alignment_q_score` parameter. CUT&RUN and CUT&Tag both integrate adapters into the vicinity of antibody-tethered enzymes, and the exact sites of integration are affected by the accessibility of surrounding DNA. Given these experimental parameters, it is expected that there are many fragments which share common starting and end positions; thus, such duplicates are generally valid but would be filtered out by de-duplication tools. However, there will be a fraction of fragments that are present due to PCR duplication that cannot be separated. @@ -75,9 +97,9 @@ Control samples such as those from IgG datasets have relatively high duplication The default for the pipeline therefore is to only run de-duplication on control samples. If it is suspected that there is a heavy fraction of PCR duplicates present in the primary samples then the parameter `dedup_target_reads` can be set using -`--dedup_target_reads true` +`--dedup_target_reads` -### Peak Normalisation +### Read Normalisation The default mode in the pipeline is to normalise stacked reads before peak calling for epitope abundance using spike-in normalisation. @@ -264,6 +286,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. @@ -278,6 +308,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b9..27feb009 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -145,6 +145,61 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send adaptive card + // https://adaptivecards.io + // + public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = workflow.manifest.version + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + def hf = new File("$projectDir/assets/adaptivecard.json") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + // // Print pipeline summary on completion // diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100755 new mode 100644 index 28567bd7..8d030f4e --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,19 +21,26 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { + if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } diff --git a/lib/WorkflowCutandrun.groovy b/lib/WorkflowCutandrun.groovy index fc19f68a..35ce645e 100755 --- a/lib/WorkflowCutandrun.groovy +++ b/lib/WorkflowCutandrun.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the workflow/cutandrun.nf in the nf-core/cutandrun pipeline // +import groovy.text.SimpleTemplateEngine + class WorkflowCutandrun { // @@ -10,6 +12,7 @@ class WorkflowCutandrun { public static void initialise(params, log) { genomeExistsError(params, log) + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) @@ -62,7 +65,22 @@ class WorkflowCutandrun { return yaml_file_text } - // + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html + }// // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 87df6453..4891ba4d 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -57,6 +57,7 @@ class WorkflowMain { } // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) // Check that a -profile or Nextflow config has been provided to run the pipeline @@ -76,18 +77,16 @@ class WorkflowMain { System.exit(1) } } - // // Get attribute from genome config file e.g. fasta // - public static String getGenomeAttribute(params, attribute) { - def val = '' + public static Object getGenomeAttribute(params, attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] + return params.genomes[ params.genome ][ attribute ] } } - return val + return null } // diff --git a/main.nf b/main.nf index bb4380e2..4f2732c4 100644 --- a/main.nf +++ b/main.nf @@ -4,6 +4,7 @@ nf-core/cutandrun ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/cutandrun + Website: https://nf-co.re/cutandrun Slack : https://nfcore.slack.com/channels/cutandrun ---------------------------------------------------------------------------------------- @@ -17,13 +18,8 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -if (!params.fasta) { - params.bowtie2 = WorkflowMain.getGenomeAttribute(params, 'bowtie2') -} else { - params.bowtie2 = null -} - params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.bowtie2 = WorkflowMain.getGenomeAttribute(params, 'bowtie2') params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'bed12') params.blacklist = WorkflowMain.getGenomeAttribute(params, 'blacklist') @@ -34,12 +30,8 @@ params.blacklist = WorkflowMain.getGenomeAttribute(params, 'blacklist') ======================================================================================== */ -if (!params.spikein_fasta) { - params.spikein_bowtie2 = WorkflowMain.getGenomeAttributeSpikeIn(params, 'bowtie2') -} else { - params.spikein_bowtie2 = null -} params.spikein_fasta = WorkflowMain.getGenomeAttributeSpikeIn(params, 'fasta') +params.spikein_bowtie2 = WorkflowMain.getGenomeAttributeSpikeIn(params, 'bowtie2') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/modules.json b/modules.json index 4f20b9e8..6ad04247 100644 --- a/modules.json +++ b/modules.json @@ -2,81 +2,118 @@ "name": "nf-core/cutandrun", "homePage": "https://github.com/nf-core/cutandrun", "repos": { - "nf-core/modules": { - "bedtools/bamtobed": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bedtools/genomecov": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bedtools/intersect": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bedtools/merge": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bedtools/sort": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bowtie2/align": { - "git_sha": "cbc47767f7bb4751a9d178395bd66f051401a0c1" - }, - "bowtie2/build": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "cat/fastq": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" - }, - "custom/getchromsizes": { - "git_sha": "213403187932dbbdd936a04474cc8cd8abae7a08" - }, - "deeptools/computematrix": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "deeptools/plotheatmap": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "gunzip": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" - }, - "macs2/callpeak": { - "git_sha": "f0800157544a82ae222931764483331a81812012" - }, - "picard/markduplicates": { - "git_sha": "63db63757cab03cfa7a02c0d0f134b66fbfadea6" - }, - "samtools/flagstat": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/idxstats": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/index": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/sort": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/stats": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/view": { - "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce" - }, - "seacr/callpeak": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "ucsc/bedclip": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "ucsc/bedgraphtobigwig": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "untar": { - "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5" + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "bedtools/bamtobed": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bedtools/complement": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bedtools/genomecov": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bedtools/intersect": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bedtools/merge": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bowtie2/align": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "cat/fastq": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "custom/getchromsizes": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "deeptools/computematrix": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "deeptools/plotfingerprint": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "deeptools/plotheatmap": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "fastqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "gunzip": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "macs2/callpeak": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/index": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/sort": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/stats": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "seacr/callpeak": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "tabix/bgziptabix": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "ucsc/bedclip": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "ucsc/bedgraphtobigwig": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "untar": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } + } } } } diff --git a/modules/local/modules/custom/dumpsoftwareversions/main.nf b/modules/local/custom_dumpsoftwareversions.nf similarity index 89% rename from modules/local/modules/custom/dumpsoftwareversions/main.nf rename to modules/local/custom_dumpsoftwareversions.nf index ddc7306d..ad1626e2 100644 --- a/modules/local/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/local/custom_dumpsoftwareversions.nf @@ -1,8 +1,8 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.12" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.13" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" @@ -127,8 +127,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { } versions_mqc = { - 'id': 'software_versions', - 'section_name': '${workflow.manifest.name} Software Versions by Process', + 'parent_id': 'software_versions', + 'parent_name': 'Software Versions', + 'parent_description': 'Details software versions used in the pipeline run', + 'id': 'software-versions-by-process', + 'section_name': '${workflow.manifest.name} software versions by process', 'section_href': 'https://github.com/${workflow.manifest.name}', 'plot_type': 'html', 'description': 'are collected at run time from the software output.', @@ -136,7 +139,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { } versions_mqc_unique = { - 'id': 'software_versions_unique', + 'parent_id': 'software_versions', + 'parent_name': 'Software Versions', + 'parent_description': 'Details software versions used in the pipeline run', + 'id': 'software-versions-unique', 'section_name': '${workflow.manifest.name} Software Versions', 'section_href': 'https://github.com/${workflow.manifest.name}', 'plot_type': 'html', diff --git a/modules/local/modules/deeptools/bamcoverage/main.nf b/modules/local/deeptools/bamcoverage/main.nf similarity index 100% rename from modules/local/modules/deeptools/bamcoverage/main.nf rename to modules/local/deeptools/bamcoverage/main.nf diff --git a/modules/local/deeptools/multibamsummary/main.nf b/modules/local/deeptools/multibamsummary/main.nf new file mode 100644 index 00000000..d6fe03ec --- /dev/null +++ b/modules/local/deeptools/multibamsummary/main.nf @@ -0,0 +1,35 @@ +process DEEPTOOLS_MULTIBAMSUMMARY { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(bams), path(bais), val(labels) + + output: + tuple val(meta), path("*.npz") , emit: matrix + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + multiBamSummary bins \\ + $args \\ + --bamfiles ${bams.join(' ')} \\ + --labels ${labels.join(' ')} \\ + --numberOfProcessors $task.cpus \\ + --outFileName all_bam.bamSummary.npz \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(multiBamSummary --version | sed -e "s/multiBamSummary //g") + END_VERSIONS + """ +} diff --git a/modules/local/deeptools/plotcorrelation/main.nf b/modules/local/deeptools/plotcorrelation/main.nf new file mode 100644 index 00000000..546a0ad7 --- /dev/null +++ b/modules/local/deeptools/plotcorrelation/main.nf @@ -0,0 +1,36 @@ +process DEEPTOOLS_PLOTCORRELATION { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(matrix) + + output: + tuple val(meta), path("*.pdf"), emit: pdf + tuple val(meta), path("*.tab"), emit: matrix + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + plotCorrelation \\ + $args \\ + --corData $matrix \\ + --plotFile ${prefix}.plotCorrelation.pdf \\ + --outFileCorMatrix ${prefix}.plotCorrelation.mat.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(plotCorrelation --version | sed -e "s/plotCorrelation //g") + END_VERSIONS + """ +} diff --git a/modules/local/deeptools/plotpca/main.nf b/modules/local/deeptools/plotpca/main.nf new file mode 100644 index 00000000..a2193183 --- /dev/null +++ b/modules/local/deeptools/plotpca/main.nf @@ -0,0 +1,36 @@ +process DEEPTOOLS_PLOTPCA { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(matrix) + + output: + tuple val(meta), path("*.pdf"), emit: pdf + tuple val(meta), path("*.tab"), emit: tab + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + plotPCA \\ + $args \\ + --corData $matrix \\ + --plotFile ${prefix}.plotPCA.pdf \\ + --outFileNameData ${prefix}.plotPCA.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(plotPCA --version | sed -e "s/plotPCA //g") + END_VERSIONS + """ +} diff --git a/modules/local/export_meta.nf b/modules/local/export_meta.nf deleted file mode 100644 index 6a18e362..00000000 --- a/modules/local/export_meta.nf +++ /dev/null @@ -1,53 +0,0 @@ -process EXPORT_META { - label 'process_min' - - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" - - input: - val meta - val table_name - - output: - path "*.csv", emit: csv - - when: - task.ext.when == null || task.ext.when - - script: - def header = [:] - - // Find the header key set - for (int i = 0; i < meta.size(); i++) { - meta[i].each { - entry -> - if(!header.containsKey(entry.key)) { - header.put(entry.key, null) - } - } - } - - // Init output string - arr_str = header.keySet().join(",") - - // Map the values and write row - for (int i = 0; i < meta.size(); i++) { - header.each { - entry -> - entry.value = null - } - - meta[i].each { - entry -> - header[entry.key] = entry.value - } - sample_str = header.values().join(",") - arr_str = arr_str + "\n" + sample_str - } - - """ - echo "$arr_str" > ${table_name}.csv - """ -} diff --git a/modules/nf-core/modules/bedtools/sort/main.nf b/modules/local/for_patch/bedtools/sort/main.nf similarity index 89% rename from modules/nf-core/modules/bedtools/sort/main.nf rename to modules/local/for_patch/bedtools/sort/main.nf index 43dc9bda..e4d47183 100644 --- a/modules/nf-core/modules/bedtools/sort/main.nf +++ b/modules/local/for_patch/bedtools/sort/main.nf @@ -10,6 +10,7 @@ process BEDTOOLS_SORT { input: tuple val(meta), path(intervals) val extension + path sizes output: tuple val(meta), path("*.${extension}"), emit: sorted @@ -19,12 +20,14 @@ process BEDTOOLS_SORT { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def sizes = sizes ? "-g $sizes" : "" """ bedtools \\ sort \\ -i $intervals \\ + $sizes \\ $args \\ > ${prefix}.${extension} diff --git a/modules/nf-core/modules/bedtools/sort/meta.yml b/modules/local/for_patch/bedtools/sort/meta.yml similarity index 100% rename from modules/nf-core/modules/bedtools/sort/meta.yml rename to modules/local/for_patch/bedtools/sort/meta.yml diff --git a/modules/local/for_patch/preseq/lcextrap/main.nf b/modules/local/for_patch/preseq/lcextrap/main.nf new file mode 100644 index 00000000..cf76779b --- /dev/null +++ b/modules/local/for_patch/preseq/lcextrap/main.nf @@ -0,0 +1,40 @@ +process PRESEQ_LCEXTRAP { + tag "$meta.id" + label 'process_medium' + label 'error_ignore' + + conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/preseq:3.1.2--h445547b_2': + 'quay.io/biocontainers/preseq:3.1.2--h445547b_2' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.lc_extrap.txt"), emit: lc_extrap, optional:true + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-pe' + """ + preseq \\ + lc_extrap \\ + $args \\ + $paired_end \\ + -output ${prefix}.lc_extrap.txt \\ + $bam \\ + && cp .command.log ${prefix}.command.log || cp .command.log ${prefix}.command.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + preseq: \$(echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/for_patch/preseq/lcextrap/meta.yml b/modules/local/for_patch/preseq/lcextrap/meta.yml new file mode 100755 index 00000000..f1be05a2 --- /dev/null +++ b/modules/local/for_patch/preseq/lcextrap/meta.yml @@ -0,0 +1,48 @@ +name: preseq_lcextrap +description: Software for predicting library complexity and genome coverage in high-throughput sequencing +keywords: + - preseq + - library + - complexity +tools: + - preseq: + description: Software for predicting library complexity and genome coverage in high-throughput sequencing + homepage: http://smithlabresearch.org/software/preseq/ + documentation: http://smithlabresearch.org/wp-content/uploads/manual.pdf + tool_dev_url: https://github.com/smithlabcode/preseq + doi: "" + licence: ["GPL"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - lc_extrap: + type: file + description: File containing output of Preseq lcextrap + pattern: "*.{lc_extrap.txt}" + - log: + type: file + description: Log file containing stderr produced by Preseq + pattern: "*.{log}" + +authors: + - "@drpatelh" + - "@Emiller88" diff --git a/modules/nf-core/modules/samtools/view/main.nf b/modules/local/for_patch/samtools/view/main.nf similarity index 95% rename from modules/nf-core/modules/samtools/view/main.nf rename to modules/local/for_patch/samtools/view/main.nf index 55194e88..b05e8aac 100644 --- a/modules/nf-core/modules/samtools/view/main.nf +++ b/modules/local/for_patch/samtools/view/main.nf @@ -10,6 +10,7 @@ process SAMTOOLS_VIEW { input: tuple val(meta), path(input), path(index) path fasta + path regions output: tuple val(meta), path("*.bam") , emit: bam , optional: true @@ -24,6 +25,7 @@ process SAMTOOLS_VIEW { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta} -C" : "" + def blacklist = regions ? "-L $regions" : "" def file_type = input.getExtension() if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ @@ -31,6 +33,7 @@ process SAMTOOLS_VIEW { view \\ --threads ${task.cpus-1} \\ ${reference} \\ + ${blacklist} \\ $args \\ $input \\ $args2 \\ diff --git a/modules/nf-core/modules/samtools/view/meta.yml b/modules/local/for_patch/samtools/view/meta.yml similarity index 100% rename from modules/nf-core/modules/samtools/view/meta.yml rename to modules/local/for_patch/samtools/view/meta.yml diff --git a/modules/local/modules/trimgalore/main.nf b/modules/local/for_patch/trimgalore/main.nf similarity index 100% rename from modules/local/modules/trimgalore/main.nf rename to modules/local/for_patch/trimgalore/main.nf diff --git a/modules/local/gtf2bed.nf b/modules/local/gtf2bed.nf new file mode 100644 index 00000000..a6540ad1 --- /dev/null +++ b/modules/local/gtf2bed.nf @@ -0,0 +1,32 @@ +process GTF2BED { + tag "$gtf" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::perl=5.26.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/perl:5.26.2' : + 'quay.io/biocontainers/perl:5.26.2' }" + + input: + path gtf + + output: + path '*.bed' , emit: bed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/cutandrun/bin/ + def args = params.igv_show_gene_names ? "--names" : '' + """ + gtf2bed \\ + $args \\ + $gtf \\ + > ${gtf.baseName}.bed + cat <<-END_VERSIONS > versions.yml + "${task.process}": + perl: \$(echo \$(perl --version 2>&1) | sed 's/.*v\\(.*\\)) built.*/\\1/') + END_VERSIONS + """ +} diff --git a/modules/local/linux/awk.nf b/modules/local/linux/awk.nf index d56926f9..ad2f299d 100644 --- a/modules/local/linux/awk.nf +++ b/modules/local/linux/awk.nf @@ -1,6 +1,6 @@ process AWK { tag "$meta.id" - label 'process_ultralow' + label 'process_single' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/linux/cut.nf b/modules/local/linux/cut.nf index 1afaedc2..da921d03 100644 --- a/modules/local/linux/cut.nf +++ b/modules/local/linux/cut.nf @@ -1,6 +1,6 @@ process CUT { tag "$meta.id" - label 'process_min' + label 'process_single' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/modules/calculate_frip/main.nf b/modules/local/modules/calculate_frip/main.nf deleted file mode 100644 index 0a7d2d8f..00000000 --- a/modules/local/modules/calculate_frip/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process CALCULATE_FRIP { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::python=3.8.3 bioconda::deeptools=3.5.* bioconda::pysam=0.17.*" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-f42a44964bca5225c7860882e231a7b5488b5485:47ef981087c59f79fdbcab4d9d7316e9ac2e688d-0' : - 'quay.io/biocontainers/mulled-v2-f42a44964bca5225c7860882e231a7b5488b5485:47ef981087c59f79fdbcab4d9d7316e9ac2e688d-0' }" - - input: - tuple val(meta), path(bam), path(bai), path(bed) - - output: - tuple val(meta), path('frips.csv'), emit: frips - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - frip.py \\ - --bams "*.bam" \\ - --peaks "*.bed" \\ - --threads ${task.cpus} \\ - --outpath . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | grep -E -o \"([0-9]{1,}\\.)+[0-9]{1,}\") - deeptools: \$(deeptools --version | sed -e "s/deeptools //g") - pysam: \$(python -c 'import pysam; print(pysam.__version__)') - END_VERSIONS - """ -} diff --git a/modules/local/modules/custom/dumpsoftwareversions/meta.yml b/modules/local/modules/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 5ef619d0..00000000 --- a/modules/local/modules/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" diff --git a/modules/local/modules/deeptools/bamcoverage/meta.yml b/modules/local/modules/deeptools/bamcoverage/meta.yml deleted file mode 100644 index fb92168f..00000000 --- a/modules/local/modules/deeptools/bamcoverage/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: deeptools_bamcoverage -description: This tool takes an alignment of reads or fragments as input (BAM file) and generates a coverage track (bigWig or bedGraph) as output. -keywords: - - sort -tools: - - deeptools: - description: A set of user-friendly tools for normalization and visualzation of deep-sequencing data - homepage: https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html - documentation: https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html - tool_dev_url: https://github.com/deeptools/deepTools/ - doi: "https://doi.org/10.1093/nar/gkw257" - licence: ["GPL v3"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM file - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAM/CRAM index file - pattern: "*.{bai,crai}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bigWig: - type: file - description: BigWig file - pattern: "*.bigWig" - - bedgraph: - type: file - description: Bedgraph file - pattern: "*.bedgraph" - -authors: - - "@FriederikeHanssen" diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index 823ee449..803a8bce 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -1,17 +1,17 @@ process MULTIQC { - label 'process_ultralow' + label 'process_single' - conda (params.enable_conda ? "bioconda::multiqc=1.12" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.13" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path multiqc_config path multiqc_custom_config - path software_versions - path software_versions_unique - path workflow_summary + path ('software_versions/*') + path ('software_versions/*') + path ('workflow_summary/*') path ('fastqc/*') path ('trimgalore/fastqc/*') path ('trimgalore/*') @@ -21,12 +21,21 @@ process MULTIQC { path ('samtools/flagstat/*') path ('samtools/idxstats/*') path ('picard/markduplicates/*') - path ('reports/*') + path ('preseq/*') + path ('deeptools/*') + path ('deeptools/*') + path ('deeptools/*') + path ('peak_metrics/peak_count/*') + path ('peak_metrics/peak_frip/*') + path ('peak_metrics/peak_count_consensus/*') + path ('peak_metrics/peak_reprod_perc/*') + path ('frag_len/*') output: path "*multiqc_report.html", emit: report path "*_data" , emit: data path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -36,5 +45,10 @@ process MULTIQC { def custom_config = params.multiqc_config ? "--config $multiqc_custom_config" : '' """ multiqc -f $args $custom_config . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS """ } diff --git a/modules/local/peak_counts.nf b/modules/local/peak_counts.nf new file mode 100644 index 00000000..36ce7c60 --- /dev/null +++ b/modules/local/peak_counts.nf @@ -0,0 +1,31 @@ +process PEAK_COUNTS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(bed) + path peak_counts_header + + output: + tuple val(meta), path("*mqc.tsv"), emit: count_mqc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cat ${bed} | wc -l | awk -v OFS='\t' '{ print "Peak Count", \$1 }' | cat $peak_counts_header - > ${prefix}_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/local/peak_frip.nf b/modules/local/peak_frip.nf new file mode 100644 index 00000000..3a3bbe8b --- /dev/null +++ b/modules/local/peak_frip.nf @@ -0,0 +1,33 @@ +process PEAK_FRIP { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(peaks_bed), path(fragments_bed), path(flagstat) + path frip_score_header + val min_frip_overlap + + output: + tuple val(meta), path("*mqc.tsv"), emit: frip_mqc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + READS_IN_PEAKS=\$(bedtools intersect -a ${fragments_bed} -b ${peaks_bed} -bed -c -f $min_frip_overlap | awk -F '\t' '{sum += \$NF} END {print sum * 2}') + grep -m 1 'mapped (' ${flagstat} | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "Peak FRiP Score", a/\$1}' | cat $frip_score_header - > ${prefix}_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/local/modules/generate_reports/main.nf b/modules/local/python/frag_len_hist.nf similarity index 58% rename from modules/local/modules/generate_reports/main.nf rename to modules/local/python/frag_len_hist.nf index 4267d2e9..ccd3f649 100644 --- a/modules/local/modules/generate_reports/main.nf +++ b/modules/local/python/frag_len_hist.nf @@ -1,5 +1,5 @@ -process GENERATE_REPORTS { - label 'process_ultralow' +process FRAG_LEN_HIST { + label 'process_medium' conda (params.enable_conda ? "conda-forge::python=3.8.3 conda-forge::pandas=1.3.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -7,38 +7,24 @@ process GENERATE_REPORTS { 'quay.io/biocontainers/mulled-v2-f42a44964bca5225c7860882e231a7b5488b5485:47ef981087c59f79fdbcab4d9d7316e9ac2e688d-0' }" input: - path meta_data - path meta_data_ctrl path raw_fragments - path bed_fragments - path seacr_beds path frag_len_header_multiqc output: - path '*.pdf' , emit: pdf - path '*.csv' , emit: csv - path '*.png' , emit: png - path '*frag_len_mqc.yml', emit: frag_len_multiqc + path '*frag_len_mqc.yml', emit: frag_len_mqc path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def meta_data_resolved = meta_data ? meta_data : meta_data_ctrl - """ - reporting.py gen_reports \\ - --meta $meta_data_resolved \\ - --meta_ctrl $meta_data_ctrl \\ - --raw_frag "*.frags.len.txt" \\ - --bin_frag "*bin500.awk.bed" \\ - --seacr_bed "*bed*.bed" \\ - --output . \\ - --log log.txt + calc_frag_hist.py \\ + --frag_path "*len.txt" \\ + --output frag_len_hist.txt - if [ -f "03_03_frag_len_mqc.txt" ]; then - cat $frag_len_header_multiqc 03_03_frag_len_mqc.txt > frag_len_mqc.yml + if [ -f "frag_len_hist.txt" ]; then + cat $frag_len_header_multiqc frag_len_hist.txt > frag_len_mqc.yml fi cat <<-END_VERSIONS > versions.yml @@ -49,4 +35,4 @@ process GENERATE_REPORTS { seaborn: \$(python -c 'import seaborn; print(seaborn.__version__)') END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/local/python/igv_session.nf b/modules/local/python/igv_session.nf index 275f7cdb..231b28e2 100644 --- a/modules/local/python/igv_session.nf +++ b/modules/local/python/igv_session.nf @@ -9,12 +9,15 @@ process IGV_SESSION { input: path genome - path gtf + path genome_index + //path gtf + tuple val(meta), path(gtf_bed), path(gtf_bed_index) path beds + path secondary_beds path bigwig output: - path('*.{txt,xml,bed,bigWig,fa,fna,gtf,gff}', includeInputs:true) + path('*.{txt,xml,bed,bigWig,fa,fai,fna,gtf,gff,narrowPeak,broadPeak,gz,tbi,bedGraph}', includeInputs:true) path "versions.yml" , emit: versions when: @@ -26,6 +29,7 @@ process IGV_SESSION { colour_pos = 0 file_list = beds.collect{it.toString()}.sort() + file_list += secondary_beds.collect{it.toString()}.sort() file_list += bigwig.collect{it.toString()}.sort() for(file in file_list){ file_split = file.split('_R') @@ -49,7 +53,7 @@ process IGV_SESSION { find -L * -iname "*.gtf" -exec echo -e {}"\\t0,48,73" \\; > gtf.igv.txt find -L * -iname "*.gff" -exec echo -e {}"\\t0,48,73" \\; > gff.igv.txt cat *.txt > igv_files.txt - igv_files_to_session.py igv_session.xml igv_files.txt $genome --path_prefix './' + igv_files_to_session.py igv_session.xml igv_files.txt $genome $gtf_bed --path_prefix './' cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/python/merge_sample_metadata.nf b/modules/local/python/merge_sample_metadata.nf new file mode 100644 index 00000000..038fad8e --- /dev/null +++ b/modules/local/python/merge_sample_metadata.nf @@ -0,0 +1,38 @@ +process MERGE_SAMPLE_METADATA { + label 'process_single' + + conda (params.enable_conda ? "conda-forge::python=3.8.3 conda-forge::pandas=1.3.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-f42a44964bca5225c7860882e231a7b5488b5485:47ef981087c59f79fdbcab4d9d7316e9ac2e688d-0' : + 'quay.io/biocontainers/mulled-v2-f42a44964bca5225c7860882e231a7b5488b5485:47ef981087c59f79fdbcab4d9d7316e9ac2e688d-0' }" + + input: + path metadata + + output: + path '*.csv' , emit: csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def ext = task.ext.output ?: 'csv' + def output = task.ext.output ?: 'NO_NAME.csv' + def id_parse_string = task.ext.id_parse_string ?: '' + + """ + reports.py merge_samples \\ + --metadata "*.${ext}" \\ + --id_parse_string $id_parse_string \\ + --output $output \\ + --log log.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | grep -E -o \"([0-9]{1,}\\.)+[0-9]{1,}\") + numpy: \$(python -c 'import numpy; print(numpy.__version__)') + pandas: \$(python -c 'import pandas; print(pandas.__version__)') + END_VERSIONS + """ +} diff --git a/modules/local/modules/calculate_peak_reprod/main.nf b/modules/local/python/peak_reprod.nf similarity index 73% rename from modules/local/modules/calculate_peak_reprod/main.nf rename to modules/local/python/peak_reprod.nf index 11bfea9c..2cdb77ca 100644 --- a/modules/local/modules/calculate_peak_reprod/main.nf +++ b/modules/local/python/peak_reprod.nf @@ -1,6 +1,6 @@ process CALCULATE_PEAK_REPROD { tag "$meta.id" - label 'process_ultralow' + label 'process_medium' conda (params.enable_conda ? "conda-forge::python=3.8.3 conda-forge::dask=2021.9.1 conda-forge::pandas=1.3.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -9,21 +9,28 @@ process CALCULATE_PEAK_REPROD { input: tuple val(meta), path(bed) + path peak_reprod_header_multiqc output: - tuple val(meta), path('peak_repro.csv'), emit: csv - path "versions.yml" , emit: versions + tuple val(meta), path("*peak_repro.tsv"), emit: tsv + path "*_mqc.tsv" , emit: mqc + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ - peak_reproducability.py \\ + peak_reproducibility.py \\ + --sample_id $meta.id \\ --intersect $bed \\ --threads ${task.cpus} \\ --outpath . + cat $peak_reprod_header_multiqc *peak_repro.tsv > ${prefix}_mqc.tsv + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | grep -E -o \"([0-9]{1,}\\.)+[0-9]{1,}\") diff --git a/modules/local/modules/plot_consensus_peaks/main.nf b/modules/local/python/plot_consensus_peaks.nf similarity index 95% rename from modules/local/modules/plot_consensus_peaks/main.nf rename to modules/local/python/plot_consensus_peaks.nf index c7fbe474..193ef0ac 100644 --- a/modules/local/modules/plot_consensus_peaks/main.nf +++ b/modules/local/python/plot_consensus_peaks.nf @@ -1,5 +1,5 @@ process PLOT_CONSENSUS_PEAKS { - label 'process_min' + label 'process_single' conda (params.enable_conda ? "conda-forge::python=3.8.3 conda-forge::numpy=1.20.* conda-forge::pandas=1.2.* conda-forge::upsetplot=0.4.4" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -15,7 +15,7 @@ process PLOT_CONSENSUS_PEAKS { script: """ - consensus_peaks.py \\ + plot_consensus_peaks.py \\ --peaks "*.peaks.bed" \\ --outpath . diff --git a/modules/local/python/samplesheet_check.nf b/modules/local/python/samplesheet_check.nf index d2aa887c..0ff1a079 100644 --- a/modules/local/python/samplesheet_check.nf +++ b/modules/local/python/samplesheet_check.nf @@ -1,6 +1,6 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" - label 'process_min' + label 'process_single' conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) container "quay.io/biocontainers/python:3.8.3" diff --git a/modules/local/samtools_custom_view.nf b/modules/local/samtools_custom_view.nf index 699c7e03..0540f4e3 100644 --- a/modules/local/samtools_custom_view.nf +++ b/modules/local/samtools_custom_view.nf @@ -1,6 +1,6 @@ process SAMTOOLS_CUSTOMVIEW { tag "$meta.id" - label 'process_ultralow' + label 'process_low' conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -14,6 +14,9 @@ process SAMTOOLS_CUSTOMVIEW { tuple val(meta), path("*.txt") , emit: tsv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' diff --git a/modules/nf-core/modules/bedtools/bamtobed/main.nf b/modules/nf-core/bedtools/bamtobed/main.nf similarity index 97% rename from modules/nf-core/modules/bedtools/bamtobed/main.nf rename to modules/nf-core/bedtools/bamtobed/main.nf index fd782dec..e3d3ee21 100644 --- a/modules/nf-core/modules/bedtools/bamtobed/main.nf +++ b/modules/nf-core/bedtools/bamtobed/main.nf @@ -1,6 +1,6 @@ process BEDTOOLS_BAMTOBED { tag "$meta.id" - label 'process_medium' + label 'process_single' conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/modules/bedtools/bamtobed/meta.yml b/modules/nf-core/bedtools/bamtobed/meta.yml similarity index 100% rename from modules/nf-core/modules/bedtools/bamtobed/meta.yml rename to modules/nf-core/bedtools/bamtobed/meta.yml diff --git a/modules/nf-core/bedtools/complement/main.nf b/modules/nf-core/bedtools/complement/main.nf new file mode 100644 index 00000000..d539dbdb --- /dev/null +++ b/modules/nf-core/bedtools/complement/main.nf @@ -0,0 +1,37 @@ +process BEDTOOLS_COMPLEMENT { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(bed) + path sizes + + output: + tuple val(meta), path('*.bed'), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bedtools \\ + complement \\ + -i $bed \\ + -g $sizes \\ + $args \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/complement/meta.yml b/modules/nf-core/bedtools/complement/meta.yml new file mode 100644 index 00000000..708a2161 --- /dev/null +++ b/modules/nf-core/bedtools/complement/meta.yml @@ -0,0 +1,43 @@ +name: bedtools_complement +description: Returns all intervals in a genome that are not covered by at least one interval in the input BED/GFF/VCF file. +keywords: + - bed + - complement +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/complement.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Input BED file + pattern: "*.{bed}" + - sizes: + type: file + description: File which defines the chromosome lengths for a given genome + pattern: "*.{sizes}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Bed file with all genomic intervals that are not covered by at least one record from the input file. + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" + - "@sruthipsuresh" + - "@drpatelh" diff --git a/modules/nf-core/modules/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf similarity index 98% rename from modules/nf-core/modules/bedtools/genomecov/main.nf rename to modules/nf-core/bedtools/genomecov/main.nf index 4804d40a..05e359c9 100644 --- a/modules/nf-core/modules/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -1,6 +1,6 @@ process BEDTOOLS_GENOMECOV { tag "$meta.id" - label 'process_medium' + label 'process_single' conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/modules/bedtools/genomecov/meta.yml b/modules/nf-core/bedtools/genomecov/meta.yml similarity index 100% rename from modules/nf-core/modules/bedtools/genomecov/meta.yml rename to modules/nf-core/bedtools/genomecov/meta.yml diff --git a/modules/nf-core/modules/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf similarity index 97% rename from modules/nf-core/modules/bedtools/intersect/main.nf rename to modules/nf-core/bedtools/intersect/main.nf index 29f16adb..2dee297c 100644 --- a/modules/nf-core/modules/bedtools/intersect/main.nf +++ b/modules/nf-core/bedtools/intersect/main.nf @@ -1,6 +1,6 @@ process BEDTOOLS_INTERSECT { tag "$meta.id" - label 'process_medium' + label 'process_single' conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/modules/bedtools/intersect/meta.yml b/modules/nf-core/bedtools/intersect/meta.yml similarity index 100% rename from modules/nf-core/modules/bedtools/intersect/meta.yml rename to modules/nf-core/bedtools/intersect/meta.yml diff --git a/modules/nf-core/modules/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf similarity index 97% rename from modules/nf-core/modules/bedtools/merge/main.nf rename to modules/nf-core/bedtools/merge/main.nf index 6d1daa03..ba7cd9eb 100644 --- a/modules/nf-core/modules/bedtools/merge/main.nf +++ b/modules/nf-core/bedtools/merge/main.nf @@ -1,6 +1,6 @@ process BEDTOOLS_MERGE { tag "$meta.id" - label 'process_medium' + label 'process_single' conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/modules/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml similarity index 100% rename from modules/nf-core/modules/bedtools/merge/meta.yml rename to modules/nf-core/bedtools/merge/meta.yml diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf similarity index 100% rename from modules/nf-core/modules/bowtie2/align/main.nf rename to modules/nf-core/bowtie2/align/main.nf diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml similarity index 98% rename from modules/nf-core/modules/bowtie2/align/meta.yml rename to modules/nf-core/bowtie2/align/meta.yml index c99fa4e3..42ba0f96 100644 --- a/modules/nf-core/modules/bowtie2/align/meta.yml +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -2,7 +2,9 @@ name: bowtie2_align description: Align reads to a reference genome using bowtie2 keywords: - align + - map - fasta + - fastq - genome - reference tools: diff --git a/modules/nf-core/modules/bowtie2/build/main.nf b/modules/nf-core/bowtie2/build/main.nf similarity index 100% rename from modules/nf-core/modules/bowtie2/build/main.nf rename to modules/nf-core/bowtie2/build/main.nf diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml similarity index 100% rename from modules/nf-core/modules/bowtie2/build/meta.yml rename to modules/nf-core/bowtie2/build/meta.yml diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf similarity index 61% rename from modules/nf-core/modules/cat/fastq/main.nf rename to modules/nf-core/cat/fastq/main.nf index b6854895..4fa365d3 100644 --- a/modules/nf-core/modules/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -1,6 +1,6 @@ process CAT_FASTQ { tag "$meta.id" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -20,9 +20,9 @@ process CAT_FASTQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads.collect{ it.toString() } + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] if (meta.single_end) { - if (readList.size > 1) { + if (readList.size >= 1) { """ cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz @@ -33,7 +33,7 @@ process CAT_FASTQ { """ } } else { - if (readList.size > 2) { + if (readList.size >= 2) { def read1 = [] def read2 = [] readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } @@ -48,4 +48,33 @@ process CAT_FASTQ { """ } } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + } diff --git a/modules/nf-core/modules/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml similarity index 100% rename from modules/nf-core/modules/cat/fastq/meta.yml rename to modules/nf-core/cat/fastq/meta.yml diff --git a/modules/nf-core/modules/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf similarity index 51% rename from modules/nf-core/modules/custom/getchromsizes/main.nf rename to modules/nf-core/custom/getchromsizes/main.nf index 0eabf3a4..8e1693d4 100644 --- a/modules/nf-core/modules/custom/getchromsizes/main.nf +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -1,6 +1,6 @@ process CUSTOM_GETCHROMSIZES { tag "$fasta" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -8,12 +8,13 @@ process CUSTOM_GETCHROMSIZES { 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path '*.sizes' , emit: sizes - path '*.fai' , emit: fai - path "versions.yml", emit: versions + tuple val(meta), path ("*.sizes"), emit: sizes + tuple val(meta), path ("*.fai") , emit: fai + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,7 +27,18 @@ process CUSTOM_GETCHROMSIZES { cat <<-END_VERSIONS > versions.yml "${task.process}": - custom: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + touch ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/modules/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml similarity index 62% rename from modules/nf-core/modules/custom/getchromsizes/meta.yml rename to modules/nf-core/custom/getchromsizes/meta.yml index ee6c2571..219ca1d8 100644 --- a/modules/nf-core/modules/custom/getchromsizes/meta.yml +++ b/modules/nf-core/custom/getchromsizes/meta.yml @@ -14,12 +14,22 @@ tools: licence: ["MIT"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: FASTA file - pattern: "*.{fasta}" + pattern: "*.{fa,fasta,fna,fas}" output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - sizes: type: file description: File containing chromosome lengths @@ -28,11 +38,16 @@ output: type: file description: FASTA index file pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" - versions: type: file - description: File containing software version + description: File containing software versions pattern: "versions.yml" authors: - "@tamara-hodgetts" - "@chris-cheshire" + - "@muffato" diff --git a/modules/nf-core/modules/deeptools/computematrix/main.nf b/modules/nf-core/deeptools/computematrix/main.nf similarity index 100% rename from modules/nf-core/modules/deeptools/computematrix/main.nf rename to modules/nf-core/deeptools/computematrix/main.nf diff --git a/modules/nf-core/modules/deeptools/computematrix/meta.yml b/modules/nf-core/deeptools/computematrix/meta.yml similarity index 100% rename from modules/nf-core/modules/deeptools/computematrix/meta.yml rename to modules/nf-core/deeptools/computematrix/meta.yml diff --git a/modules/nf-core/deeptools/plotfingerprint/main.nf b/modules/nf-core/deeptools/plotfingerprint/main.nf new file mode 100644 index 00000000..83613be7 --- /dev/null +++ b/modules/nf-core/deeptools/plotfingerprint/main.nf @@ -0,0 +1,41 @@ +process DEEPTOOLS_PLOTFINGERPRINT { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(bams), path(bais) + + output: + tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.raw.txt") , emit: matrix + tuple val(meta), path("*.qcmetrics.txt"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extend = (meta.single_end && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' + """ + plotFingerprint \\ + $args \\ + $extend \\ + --bamfiles ${bams.join(' ')} \\ + --plotFile ${prefix}.plotFingerprint.pdf \\ + --outRawCounts ${prefix}.plotFingerprint.raw.txt \\ + --outQualityMetrics ${prefix}.plotFingerprint.qcmetrics.txt \\ + --numberOfProcessors $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(plotFingerprint --version | sed -e "s/plotFingerprint //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/deeptools/plotfingerprint/meta.yml b/modules/nf-core/deeptools/plotfingerprint/meta.yml new file mode 100644 index 00000000..07c25748 --- /dev/null +++ b/modules/nf-core/deeptools/plotfingerprint/meta.yml @@ -0,0 +1,61 @@ +name: deeptools_plotfingerprint +description: plots cumulative reads coverages by BAM file +keywords: + - plot + - fingerprint + - cumulative coverage + - bam +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualization of deep-sequencing data + homepage: + documentation: https://deeptools.readthedocs.io/en/develop/index.html + tool_dev_url: https://github.com/deeptools/deepTools + doi: "10.1093/nar/gku365" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: One or more BAM files + pattern: "*.{bam}" + - bais: + type: file + description: Corresponding BAM file indexes + pattern: "*.bam.bai" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pdf: + type: file + description: | + Output figure containing resulting plot + pattern: "*.{plotFingerprint.pdf}" + - matrix: + type: file + description: | + Output file summarizing the read counts per bin + pattern: "*.{plotFingerprint.raw.txt}" + - metrics: + type: file + description: | + file containing BAM file quality metrics + pattern: "*.{qcmetrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/modules/deeptools/plotheatmap/main.nf b/modules/nf-core/deeptools/plotheatmap/main.nf similarity index 100% rename from modules/nf-core/modules/deeptools/plotheatmap/main.nf rename to modules/nf-core/deeptools/plotheatmap/main.nf diff --git a/modules/nf-core/modules/deeptools/plotheatmap/meta.yml b/modules/nf-core/deeptools/plotheatmap/meta.yml similarity index 100% rename from modules/nf-core/modules/deeptools/plotheatmap/meta.yml rename to modules/nf-core/deeptools/plotheatmap/meta.yml diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/fastqc/main.nf similarity index 100% rename from modules/nf-core/modules/fastqc/main.nf rename to modules/nf-core/fastqc/main.nf diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 100% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/gunzip/main.nf similarity index 75% rename from modules/nf-core/modules/gunzip/main.nf rename to modules/nf-core/gunzip/main.nf index 61bf1afa..fa6ba26a 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -1,6 +1,6 @@ process GUNZIP { tag "$archive" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -31,4 +31,14 @@ process GUNZIP { gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') END_VERSIONS """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml similarity index 100% rename from modules/nf-core/modules/gunzip/meta.yml rename to modules/nf-core/gunzip/meta.yml diff --git a/modules/nf-core/modules/macs2/callpeak/main.nf b/modules/nf-core/macs2/callpeak/main.nf similarity index 100% rename from modules/nf-core/modules/macs2/callpeak/main.nf rename to modules/nf-core/macs2/callpeak/main.nf diff --git a/modules/nf-core/modules/macs2/callpeak/meta.yml b/modules/nf-core/macs2/callpeak/meta.yml similarity index 100% rename from modules/nf-core/modules/macs2/callpeak/meta.yml rename to modules/nf-core/macs2/callpeak/meta.yml diff --git a/modules/nf-core/modules/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf similarity index 92% rename from modules/nf-core/modules/picard/markduplicates/main.nf rename to modules/nf-core/picard/markduplicates/main.nf index 87f913d4..4e559fea 100644 --- a/modules/nf-core/modules/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -2,10 +2,10 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/modules/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml similarity index 100% rename from modules/nf-core/modules/picard/markduplicates/meta.yml rename to modules/nf-core/picard/markduplicates/meta.yml diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..ef940db2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.fai"), emit: fai + tuple val(meta), path ("*.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $args \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..fe2fe9a1 --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,47 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf similarity index 86% rename from modules/nf-core/modules/samtools/flagstat/main.nf rename to modules/nf-core/samtools/flagstat/main.nf index b87b2108..c3152aca 100644 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -1,6 +1,6 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -19,12 +19,13 @@ process SAMTOOLS_FLAGSTAT { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ flagstat \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ $bam \\ - > ${bam}.flagstat + > ${prefix}.flagstat cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml similarity index 100% rename from modules/nf-core/modules/samtools/flagstat/meta.yml rename to modules/nf-core/samtools/flagstat/meta.yml diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf similarity index 88% rename from modules/nf-core/modules/samtools/idxstats/main.nf rename to modules/nf-core/samtools/idxstats/main.nf index a49ff35f..87618e5f 100644 --- a/modules/nf-core/modules/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -19,11 +19,14 @@ process SAMTOOLS_IDXSTATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ samtools \\ idxstats \\ + --threads ${task.cpus-1} \\ $bam \\ - > ${bam}.idxstats + > ${prefix}.idxstats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml similarity index 100% rename from modules/nf-core/modules/samtools/idxstats/meta.yml rename to modules/nf-core/samtools/idxstats/meta.yml diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf similarity index 100% rename from modules/nf-core/modules/samtools/index/main.nf rename to modules/nf-core/samtools/index/main.nf diff --git a/modules/nf-core/modules/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml similarity index 100% rename from modules/nf-core/modules/samtools/index/meta.yml rename to modules/nf-core/samtools/index/meta.yml diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf similarity index 95% rename from modules/nf-core/modules/samtools/sort/main.nf rename to modules/nf-core/samtools/sort/main.nf index b4fc1cbe..ab7f1cca 100644 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -12,6 +12,7 @@ process SAMTOOLS_SORT { output: tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml similarity index 92% rename from modules/nf-core/modules/samtools/sort/meta.yml rename to modules/nf-core/samtools/sort/meta.yml index a820c55a..09289751 100644 --- a/modules/nf-core/modules/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -39,6 +39,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf similarity index 88% rename from modules/nf-core/modules/samtools/stats/main.nf rename to modules/nf-core/samtools/stats/main.nf index bbdc3240..9b0c3867 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -1,6 +1,6 @@ process SAMTOOLS_STATS { tag "$meta.id" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -20,14 +20,15 @@ process SAMTOOLS_STATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ stats \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ ${reference} \\ ${input} \\ - > ${input}.stats + > ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +39,7 @@ process SAMTOOLS_STATS { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${input}.stats + touch ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml similarity index 100% rename from modules/nf-core/modules/samtools/stats/meta.yml rename to modules/nf-core/samtools/stats/meta.yml diff --git a/modules/nf-core/modules/seacr/callpeak/main.nf b/modules/nf-core/seacr/callpeak/main.nf similarity index 83% rename from modules/nf-core/modules/seacr/callpeak/main.nf rename to modules/nf-core/seacr/callpeak/main.nf index 340c0eef..f0311f2f 100644 --- a/modules/nf-core/modules/seacr/callpeak/main.nf +++ b/modules/nf-core/seacr/callpeak/main.nf @@ -1,9 +1,8 @@ -def VERSION = '1.3' // Version information not provided by tool on CLI - process SEACR_CALLPEAK { tag "$meta.id" label 'process_medium' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? "bioconda::seacr=1.3 conda-forge::r-base=4.0.2 bioconda::bedtools=2.30.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-03bfeb32fe80910c231f630d4262b83677c8c0f4:f4bb19b68e66de27e4c64306f951d5ff11919931-0' : @@ -24,6 +23,7 @@ process SEACR_CALLPEAK { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def function_switch = ctrlbedgraph ? "$ctrlbedgraph" : "$threshold" + def VERSION = '1.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ SEACR_1.3.sh \\ $bedgraph \\ diff --git a/modules/nf-core/modules/seacr/callpeak/meta.yml b/modules/nf-core/seacr/callpeak/meta.yml similarity index 100% rename from modules/nf-core/modules/seacr/callpeak/meta.yml rename to modules/nf-core/seacr/callpeak/meta.yml diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf new file mode 100644 index 00000000..0d05984a --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -0,0 +1,45 @@ +process TABIX_BGZIPTABIX { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : + 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.gz"), path("*.tbi"), emit: gz_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz + tabix $args2 ${prefix}.${input.getExtension()}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.gz + touch ${prefix}.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml new file mode 100644 index 00000000..49c03289 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -0,0 +1,45 @@ +name: tabix_bgziptabix +description: bgzip a sorted tab-delimited genome file and then create tabix index +keywords: + - bgzip + - compress + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: TAB-delimited genome position file + pattern: "*.{bed,gff,sam,vcf}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gz: + type: file + description: Output compressed file + pattern: "*.{gz}" + - tbi: + type: file + description: tabix index file + pattern: "*.{gz.tbi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" diff --git a/modules/nf-core/modules/ucsc/bedclip/main.nf b/modules/nf-core/ucsc/bedclip/main.nf old mode 100644 new mode 100755 similarity index 77% rename from modules/nf-core/modules/ucsc/bedclip/main.nf rename to modules/nf-core/ucsc/bedclip/main.nf index 969a8f73..42dd08bb --- a/modules/nf-core/modules/ucsc/bedclip/main.nf +++ b/modules/nf-core/ucsc/bedclip/main.nf @@ -1,9 +1,8 @@ -def VERSION = '377' // Version information not provided by tool on CLI - process UCSC_BEDCLIP { tag "$meta.id" label 'process_medium' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? "bioconda::ucsc-bedclip=377" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ucsc-bedclip:377--h0b8a92a_2' : @@ -23,6 +22,7 @@ process UCSC_BEDCLIP { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ bedClip \\ $bedgraph \\ diff --git a/modules/nf-core/modules/ucsc/bedclip/meta.yml b/modules/nf-core/ucsc/bedclip/meta.yml old mode 100644 new mode 100755 similarity index 100% rename from modules/nf-core/modules/ucsc/bedclip/meta.yml rename to modules/nf-core/ucsc/bedclip/meta.yml diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf similarity index 76% rename from modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf rename to modules/nf-core/ucsc/bedgraphtobigwig/main.nf index ef0ca088..a2979e04 100644 --- a/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf +++ b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf @@ -1,9 +1,8 @@ -def VERSION = '377' // Version information not provided by tool on CLI - process UCSC_BEDGRAPHTOBIGWIG { tag "$meta.id" - label 'process_medium' + label 'process_single' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? "bioconda::ucsc-bedgraphtobigwig=377" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:377--h446ed27_1' : @@ -23,6 +22,7 @@ process UCSC_BEDGRAPHTOBIGWIG { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ bedGraphToBigWig \\ $bedgraph \\ diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml old mode 100644 new mode 100755 similarity index 100% rename from modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml rename to modules/nf-core/ucsc/bedgraphtobigwig/meta.yml diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/untar/main.nf similarity index 67% rename from modules/nf-core/modules/untar/main.nf rename to modules/nf-core/untar/main.nf index 29ab10a5..71eea7b2 100644 --- a/modules/nf-core/modules/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -1,6 +1,6 @@ process UNTAR { tag "$archive" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -25,12 +25,23 @@ process UNTAR { """ mkdir output - tar \\ - -C output --strip-components 1 \\ - -xzvf \\ - $args \\ - $archive \\ - $args2 + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in output + if [[ \$(tar -tzf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C output --strip-components 1 \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C output \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + fi mv output ${untar} diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/untar/meta.yml similarity index 85% rename from modules/nf-core/modules/untar/meta.yml rename to modules/nf-core/untar/meta.yml index d426919b..ea7a3f38 100644 --- a/modules/nf-core/modules/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -26,9 +26,9 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - untar: - type: file - description: - pattern: "*.*" + type: directory + description: Directory containing contents of archive + pattern: "*/" - versions: type: file description: File containing software versions @@ -36,3 +36,5 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 927e0fe7..99e2a700 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,7 @@ params { genome = null spikein_genome = "K12-MG1655" gene_bed = null + blacklist = null save_reference = false only_genome = false @@ -49,7 +50,7 @@ params { save_align_intermed = false // Filtering - minimum_alignment_q_score = 0 + minimum_alignment_q_score = 20 // Deduplication skip_removeduplicates = false @@ -58,34 +59,49 @@ params { // Read Normalisation normalisation_mode = "Spikein" - normalisation_binsize = 1 + normalisation_binsize = 50 normalisation_c = 10000 - igg_scale_factor = 1 + igg_scale_factor = 0.5 // Peak Caller default param peakcaller = 'seacr' - // SEACR Peak Calling + // Peak Calling use_control = true - peak_threshold = 0.05 only_peak_calling = false - - // MACS2 Peak Calling + extend_fragments = true + seacr_norm = 'non' + seacr_stringent = 'stringent' + seacr_peak_threshold = 0.05 macs2_pvalue = 0.05 - macs_gsize = 2.7e9 - run_narrow_peak = false + macs_gsize = 2.7e9 + macs2_narrow_peak = true + macs2_broad_cutoff = 0.1 // Consensus Peaks consensus_peak_mode = 'group' replicate_threshold = 1 - skip_upset_plots = false // Reporting and Visualisation skip_reporting = false skip_igv = false skip_heatmaps = false + skip_dt_qc = false + skip_peak_qc = false + skip_preseq = false skip_multiqc = false - skip_frip = false + igv_show_gene_names = true + min_frip_overlap = 0.20 + min_peak_overlap = 0.20 + + // Deeptools options + dt_heatmap_gene_bodylen = 5000 + dt_heatmap_gene_beforelen = 3000 + dt_heatmap_gene_afterlen = 3000 + dt_heatmap_peak_beforelen = 3000 + dt_heatmap_peak_afterlen = 3000 + dt_qc_bam_binsize = 500 + fragment_size = 100 // PARAM NOT USED // Boilerplate options outdir = "./results" @@ -97,15 +113,17 @@ params { max_multiqc_email_size = "25.MB" plaintext_email = false monochrome_logs = false + hook_url = null help = false igenomes_base = "s3://ngi-igenomes/igenomes/" tracedir = "${params.outdir}/pipeline_info" igenomes_ignore = false validate_params = true show_hidden_params = false - schema_ignore_params = 'genomes,callers,dedup_control_only,run_igv,run_peak_plotting,run_multiqc,run_deep_tools,run_reporting,run_consensus_all,run_peak_calling,run_remove_dups,run_mark_dups,run_q_filter,run_alignment,run_trim_galore_fastqc,run_cat_fastq,run_input_check,run_genome_prep' + schema_ignore_params = 'genomes,callers,dedup_control_only,fragment_size,run_igv,run_multiqc,run_reporting,run_consensus_all,run_peak_calling,run_remove_dups,run_mark_dups,run_read_filter,run_alignment,run_trim_galore_fastqc,run_cat_fastq,run_input_check,run_genome_prep,run_peak_qc,run_deeptools_qc,run_deeptools_heatmaps,run_preseq' enable_conda = false + // Config options custom_config_version = "master" custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" @@ -114,6 +132,7 @@ params { config_profile_contact = null config_profile_url = null + // Max resource options max_memory = "128.GB" max_cpus = 16 @@ -151,6 +170,15 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + mamba { + params.enable_conda = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } docker { docker.enabled = true // Avoid this error: @@ -205,6 +233,7 @@ profiles { test_full_small_local_zip { includeConfig "conf/test_full_small_local_zip.config" } // Runs a full experimental configuration but with a small dataset but that targets local, already downloaded compressed input files } + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' @@ -212,6 +241,7 @@ if (!params.igenomes_ignore) { params.genomes = [:] } + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -252,7 +282,7 @@ manifest { description = 'Analysis pipeline for CUT&RUN and CUT&TAG experiments that includes sequencing QC, spike-in normalisation, IgG control normalisation, peak calling and downstream peak analysis.' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.0.0' + version = '3.0.0' } // Load flowswitch.config diff --git a/nextflow_schema.json b/nextflow_schema.json index 02133b0c..6f8649b1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -202,30 +202,34 @@ "fa_icon": "fas fa-random", "description": "Skips reporting" }, + "skip_preseq": { + "type": "boolean", + "description": "Skips preseq reporting" + }, "skip_igv": { "type": "boolean", "fa_icon": "fas fa-random", "description": "Skips igv session generation" }, + "skip_dt_qc": { + "type": "boolean", + "description": "Skips deeptools QC repoting", + "fa_icon": "fas fa-random" + }, "skip_heatmaps": { "type": "boolean", "fa_icon": "fas fa-random", "description": "Skips deeptools heatmap generation" }, - "skip_multiqc": { + "skip_peak_qc": { "type": "boolean", - "fa_icon": "fas fa-random", - "description": "Skips multiqc" + "description": "Skips peak QC reporting", + "fa_icon": "fas fa-random" }, - "skip_upset_plots": { + "skip_multiqc": { "type": "boolean", "fa_icon": "fas fa-random", - "description": "Skip upset plot calculation" - }, - "skip_frip": { - "type": "boolean", - "description": "Skip fragments in peaks calculation", - "fa_icon": "fas fa-random" + "description": "Skips multiqc" } }, "fa_icon": "fas fa-exchange-alt" @@ -314,12 +318,6 @@ "description": "If normsalisation option is one of \"RPKM\", \"CPM\", \"BPM\" - then the binsize that the reads count is calculated on is used.", "fa_icon": "fas fa-arrows-alt-h" }, - "peak_threshold": { - "type": "number", - "default": 0.05, - "fa_icon": "fas fa-grip-lines", - "description": "Threshold for peak calling when no IgG is present" - }, "peakcaller": { "type": "string", "default": "seacr", @@ -332,12 +330,38 @@ "fa_icon": "fas fa-align-justify", "description": "Specifies whether to use a control to normalise peak calls against (e.g. IgG)" }, + "extend_fragments": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-align-justify", + "description": "Specifies whether to extend paired-end fragments between the read mates when calculating coveage tracks" + }, "igg_scale_factor": { "type": "number", - "default": 1.0, + "default": 0.5, "fa_icon": "fas fa-align-justify", "description": "Specifies whether the background control is scaled prior to being used to normalise peaks." }, + "seacr_peak_threshold": { + "type": "number", + "default": 0.05, + "fa_icon": "fas fa-align-justify", + "description": "SEACR p-value threshold for peaks" + }, + "seacr_norm": { + "type": "string", + "default": "non", + "fa_icon": "fas fa-align-justify", + "description": "SEACR normalization. ", + "enum": ["non", "norm"] + }, + "seacr_stringent": { + "type": "string", + "default": "stringent", + "fa_icon": "fas fa-align-justify", + "description": "SEACR stringency.", + "enum": ["stringent", "relaxed"] + }, "macs2_pvalue": { "type": "number", "default": 0.05, @@ -350,10 +374,17 @@ "fa_icon": "fas fa-align-justify", "description": "parameter required by MACS2. If using an iGenomes reference these have been provided when `--genome` is set as *GRCh37*, *GRCh38*, *GRCm38*, *WBcel235*, *BDGP6*, *R64-1-1*, *EF2*, *hg38*, *hg19* and *mm10*. Otherwise the gsize will default to GRCh38." }, - "run_narrow_peak": { + "macs2_narrow_peak": { "type": "boolean", + "default": true, "fa_icon": "fas fa-align-justify", - "description": "Specifies whether to run macs2 in narrow peak mode" + "description": "Determines whether MACS2 broad or narrow peak mode is used for the peak caller" + }, + "macs2_broad_cutoff": { + "type": "number", + "default": 0.1, + "fa_icon": "fas fa-align-justify", + "description": "MACS2 broad cutoff parameter" }, "consensus_peak_mode": { "type": "string", @@ -367,10 +398,73 @@ "default": 1.0, "fa_icon": "fas fa-align-justify", "description": "Minimum number of overlapping replicates needed for a consensus peak" + }, + "igv_show_gene_names": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-align-justify", + "description": "Show gene names instead of symbols in IGV browser sessions" } }, "fa_icon": "fas fa-cog" }, + "reporting_options": { + "title": "Reporting Options", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-atlas", + "properties": { + "dt_qc_bam_binsize": { + "type": "integer", + "default": 500, + "description": "Deeptools multiBamSummary bam bin size", + "fa_icon": "fas fa-align-justify" + }, + "dt_heatmap_gene_beforelen": { + "type": "integer", + "default": 3000, + "description": "Deeptools heatmap gene plot before length (bases)", + "fa_icon": "fas fa-align-justify" + }, + "dt_heatmap_gene_bodylen": { + "type": "integer", + "default": 5000, + "description": "Deeptools heatmap gene plot body length (bases)", + "fa_icon": "fas fa-align-justify" + }, + "dt_heatmap_gene_afterlen": { + "type": "integer", + "default": 3000, + "description": "Deeptools heatmap gene plot after length (bases)", + "fa_icon": "fas fa-align-justify" + }, + "dt_heatmap_peak_beforelen": { + "type": "integer", + "default": 3000, + "description": "Deeptools heatmap peak plot before length (bases)", + "fa_icon": "fas fa-align-justify" + }, + "dt_heatmap_peak_afterlen": { + "type": "integer", + "default": 3000, + "description": "Deeptools heatmap peak plot after length (bases)", + "fa_icon": "fas fa-align-justify" + }, + "min_frip_overlap": { + "type": "number", + "default": 0.2, + "description": "Minimum fragment overlap for FriP score", + "fa_icon": "fas fa-align-justify" + }, + "min_peak_overlap": { + "type": "number", + "default": 0.2, + "description": "Minimum peak overlap for peak reproducibility plot", + "fa_icon": "fas fa-align-justify" + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -504,6 +598,13 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "hidden": true + }, "multiqc_config": { "type": "string", "description": "Custom config file to supply to MultiQC.", @@ -556,6 +657,9 @@ { "$ref": "#/definitions/pipeline_options" }, + { + "$ref": "#/definitions/reporting_options" + }, { "$ref": "#/definitions/institutional_config_options" }, diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..0d62beb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 diff --git a/subworkflows/local/align_bowtie2.nf b/subworkflows/local/align_bowtie2.nf index 0a210c89..97ac616e 100644 --- a/subworkflows/local/align_bowtie2.nf +++ b/subworkflows/local/align_bowtie2.nf @@ -2,8 +2,8 @@ * Alignment with BOWTIE2 */ -include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' -include { BOWTIE2_ALIGN as BOWTIE2_SPIKEIN_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' +include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' +include { BOWTIE2_ALIGN as BOWTIE2_SPIKEIN_ALIGN } from '../../modules/nf-core/bowtie2/align/main' include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' include { BAM_SORT_SAMTOOLS as BAM_SORT_SAMTOOLS_SPIKEIN } from '../nf-core/bam_sort_samtools' diff --git a/subworkflows/local/annotate_meta_awk.nf b/subworkflows/local/annotate_meta_awk.nf deleted file mode 100644 index e0206c37..00000000 --- a/subworkflows/local/annotate_meta_awk.nf +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Annotate the pipeline meta data with the columns from a csv file - * generated from processing a report text file with an awk script - */ - -include { AWK_SCRIPT } from '../../modules/local/linux/awk_script' -include { AWK } from '../../modules/local/linux/awk' - -workflow ANNOTATE_META_AWK { - take: - passthrough - report - script - meta_prefix // string - meta_suffix // string - script_mode // bool - - main: - ch_versions = Channel.empty() - - // Strip out the sample id from the meta in the passthrough - ch_paths = passthrough.map { row -> [row[0].id, row[0], row[1..-1]].flatten() } - - ch_annotated_meta = Channel.empty() - // Can run awk in script mode with a file from assets or with a setup of command line args - if(script_mode) { - AWK_SCRIPT ( report, script ) - ch_versions = ch_versions.mix(AWK_SCRIPT.out.versions) - - AWK_SCRIPT.out.file - .splitCsv(header:true) - .map { row -> - new_meta = [:] - row[1].each{ k, v -> new_meta.put(meta_prefix + k + meta_suffix, v) } - [row[0].id, new_meta] - } - .join ( ch_paths ) - .map { row -> [ row[2] << row[1], row[3..-1] ] } - .set { ch_annotated_meta } - } - else { - AWK ( report ) - ch_versions = ch_versions.mix(AWK.out.versions) - - AWK.out.file - .splitCsv(header:true) - .map { row -> - new_meta = [:] - row[1].each{ k, v -> new_meta.put(meta_prefix + k + meta_suffix, v) } - [row[0].id, new_meta] - } - .join ( ch_paths ) - .map { row -> [ row[2] << row[1], row[3..-1] ] } - .set { ch_annotated_meta } - } - - emit: - output = ch_annotated_meta // channel: [ val(annotated_meta), [ passthrough ] ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/annotate_meta_csv.nf b/subworkflows/local/annotate_meta_csv.nf deleted file mode 100644 index e7423351..00000000 --- a/subworkflows/local/annotate_meta_csv.nf +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Annotate the pipeline meta data with a csv file - */ - -workflow ANNOTATE_META_CSV { - take: - passthrough // channel - reports // file - meta_suffix // string - meta_prefix // string - - main: - - main: - // Strip out the sample id from the meta in the passthrough - ch_paths = passthrough.map { row -> [row[0].id, row[0], row[1..-1]].flatten() } - - reports.splitCsv(header:true) - .map { row -> - new_meta = [:] - row[1].each{ k, v -> new_meta.put(meta_prefix + k + meta_suffix, v) } - [row[0].id, new_meta] - } - .join ( ch_paths ) - .map { row -> [ row[2] << row[1], row[3..-1] ] } - .set { ch_annotated_meta } - - emit: - output = ch_annotated_meta // channel: [ val(annotated_meta), [ passthrough ] ] -} diff --git a/subworkflows/local/calculate_fragments.nf b/subworkflows/local/calculate_fragments.nf deleted file mode 100644 index 171358b7..00000000 --- a/subworkflows/local/calculate_fragments.nf +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Calculate bed fragments from bam file - */ - -include { SAMTOOLS_VIEW } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' -include { BEDTOOLS_BAMTOBED } from '../../modules/nf-core/modules/bedtools/bamtobed/main' -include { AWK } from '../../modules/local/linux/awk' -include { CUT } from '../../modules/local/linux/cut' - -workflow CALCULATE_FRAGMENTS { - take: - bam // channel: [ val(meta), [ bam ] ] - - main: - ch_versions = Channel.empty() - - /* - * Filter BAM file - */ - SAMTOOLS_VIEW ( bam.map{ row -> [ row[0], row[1], [] ] }, [] ) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions) - - /* - * Sort BAM file - */ - SAMTOOLS_SORT ( SAMTOOLS_VIEW.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) - - // Convert to bed file - BEDTOOLS_BAMTOBED ( SAMTOOLS_SORT.out.bam ) - ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) - - // Keep the read pairs that are on the same chromosome and fragment length less than 1000bp. - AWK ( BEDTOOLS_BAMTOBED.out.bed ) - ch_versions = ch_versions.mix(AWK.out.versions) - - // Only extract the fragment related columns - CUT ( AWK.out.file ) - - emit: - bed = CUT.out.file // channel: [ val(meta), [ bed ] ] - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/consensus_peaks.nf b/subworkflows/local/consensus_peaks.nf index 4c848f88..630c7b84 100644 --- a/subworkflows/local/consensus_peaks.nf +++ b/subworkflows/local/consensus_peaks.nf @@ -1,17 +1,14 @@ /* - * Create group consensus peaks + * Create group-level consensus peaks */ include { SORT } from '../../modules/local/linux/sort' -include { BEDTOOLS_MERGE } from '../../modules/nf-core/modules/bedtools/merge/main' +include { BEDTOOLS_MERGE } from '../../modules/nf-core/bedtools/merge/main' include { AWK } from '../../modules/local/linux/awk' -include { PLOT_CONSENSUS_PEAKS } from '../../modules/local/modules/plot_consensus_peaks/main' workflow CONSENSUS_PEAKS { - take: - bed // channel: [ val(meta), [ bed ], count] - skip_plot // boolean: true/false + bed // channel: [ val(meta), [ bed ], count] main: ch_versions = Channel.empty() @@ -28,14 +25,8 @@ workflow CONSENSUS_PEAKS { AWK ( BEDTOOLS_MERGE.out.bed ) ch_versions = ch_versions.mix(AWK.out.versions) - // Plot consensus peak sets - if(!skip_plot) { - PLOT_CONSENSUS_PEAKS ( BEDTOOLS_MERGE.out.bed.collect{it[1]} ) - ch_versions = ch_versions.mix(PLOT_CONSENSUS_PEAKS.out.versions) - } - emit: - bed = BEDTOOLS_MERGE.out.bed // channel: [ val(meta), [ bed ] ] - filtered_bed = AWK.out.file // channel: [ val(meta), [ bed ] ] - versions = ch_versions // channel: [ versions.yml ] + merged_bed = BEDTOOLS_MERGE.out.bed // channel: [ val(meta), [ bed ] ] + filtered_bed = AWK.out.file // channel: [ val(meta), [ bed ] ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/extract_fragments.nf b/subworkflows/local/extract_fragments.nf new file mode 100644 index 00000000..11c4157d --- /dev/null +++ b/subworkflows/local/extract_fragments.nf @@ -0,0 +1,50 @@ +/* + * Extract fragments from a BAM file into a bedfile format +*/ + +include { SAMTOOLS_SORT } from "../../modules/nf-core/samtools/sort/main.nf" +include { BEDTOOLS_BAMTOBED } from "../../modules/nf-core/bedtools/bamtobed/main.nf" +include { AWK } from '../../modules/local/linux/awk' +include { CUT } from '../../modules/local/linux/cut' + +workflow EXTRACT_FRAGMENTS { + take: + bam // channel: [ val(meta), [ bam ] ] + main: + ch_versions = Channel.empty() + + /* + * MODULE: Sort reads by name for bamtobed + */ + SAMTOOLS_SORT ( + bam + ) + + /* + * MODULE: Convert BAM file to paired-end bed format + */ + BEDTOOLS_BAMTOBED( + SAMTOOLS_SORT.out.bam + ) + ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) + // BEDTOOLS_BAMTOBED.out.bed | view + + /* + * MODULE: Keep the read pairs that are on the same chromosome and fragment length less than 1000bp. + */ + AWK ( + BEDTOOLS_BAMTOBED.out.bed + ) + ch_versions = ch_versions.mix(AWK.out.versions) + + /* + * MODULE: Only extract the fragment related columns + */ + CUT ( + AWK.out.file + ) + + emit: + bed = CUT.out.file // channel: [ val(meta), [ bed ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/extract_metadata_awk.nf b/subworkflows/local/extract_metadata_awk.nf new file mode 100644 index 00000000..29adde36 --- /dev/null +++ b/subworkflows/local/extract_metadata_awk.nf @@ -0,0 +1,33 @@ +/* + * Generate table-based metadata from a summary report using AWK + */ + +include { AWK_SCRIPT } from '../../modules/local/linux/awk_script' +include { AWK } from '../../modules/local/linux/awk' + +workflow EXTRACT_METADATA_AWK { + take: + report + script + script_mode // bool + + main: + ch_versions = Channel.empty() + ch_metadata = Channel.empty() + + // Can run awk in script mode with a file from assets or with a setup of command line args + if(script_mode) { + AWK_SCRIPT ( report, script ) + ch_metadata = AWK_SCRIPT.out.file + ch_versions = ch_versions.mix(AWK_SCRIPT.out.versions) + } + else { + AWK ( report ) + ch_metadata = AWK.out.file + ch_versions = ch_versions.mix(AWK.out.versions) + } + + emit: + metadata = ch_metadata // channel: [ val(meta), [ metdatafile ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/fastqc_trimgalore.nf b/subworkflows/local/fastqc_trimgalore.nf index 077d0649..e130465f 100644 --- a/subworkflows/local/fastqc_trimgalore.nf +++ b/subworkflows/local/fastqc_trimgalore.nf @@ -2,8 +2,8 @@ * Read QC, read trimming and post trim QC */ -include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' -include { TRIMGALORE } from '../../modules/local/modules/trimgalore/main' +include { FASTQC } from '../../modules/nf-core/fastqc/main' +include { TRIMGALORE } from '../../modules/local/for_patch/trimgalore/main' workflow FASTQC_TRIMGALORE { take: @@ -35,7 +35,6 @@ workflow FASTQC_TRIMGALORE { trim_zip = TRIMGALORE.out.zip trim_log = TRIMGALORE.out.log ch_versions = ch_versions.mix(TRIMGALORE.out.versions) - } emit: diff --git a/subworkflows/local/peak_qc.nf b/subworkflows/local/peak_qc.nf new file mode 100644 index 00000000..69b86047 --- /dev/null +++ b/subworkflows/local/peak_qc.nf @@ -0,0 +1,159 @@ +/* + * Calculate Peak-based metrics and QC +*/ + +include { PEAK_FRIP } from "../../modules/local/peak_frip" +include { PEAK_COUNTS as PRIMARY_PEAK_COUNTS } from "../../modules/local/peak_counts" +include { PEAK_COUNTS as CONSENSUS_PEAK_COUNTS } from "../../modules/local/peak_counts" +include { CUT as CUT_CALC_REPROD } from "../../modules/local/linux/cut" +include { BEDTOOLS_INTERSECT } from "../../modules/nf-core/bedtools/intersect/main.nf" +include { CALCULATE_PEAK_REPROD } from "../../modules/local/python/peak_reprod" +include { PLOT_CONSENSUS_PEAKS } from '../../modules/local/python/plot_consensus_peaks' + +workflow PEAK_QC { + take: + peaks // channel: [ val(meta), [ bed ] ] + peaks_with_ids // channel: [ val(meta), [ bed ] ] + consensus_peaks // channel: [ val(meta), [ bed ] ] + consensus_peaks_unfiltered // channel: [ val(meta), [ bed ] ] + fragments_bed // channel: [ val(meta), [ bed ] ] + flagstat // channel: [ val(meta), [ flagstat ] ] + min_frip_overlap // val + frip_score_header_multiqc // file + peak_count_header_multiqc // file + peak_count_consensus_header_multiqc // file + peak_reprod_header_multiqc // file + + main: + ch_versions = Channel.empty() + + /* + * CHANNEL: Combine channel together for frip calculation + */ + peaks + .map { row -> [row[0].id, row ].flatten()} + .join ( fragments_bed.map { row -> [row[0].id, row ].flatten()} ) + .join ( flagstat.map { row -> [row[0].id, row ].flatten()} ) + .map { row -> [ row[1], row[2], row[4], row[6] ]} + .set { ch_frip } + //ch_frip | view + + /* + * MODULE: Calculate frip scores for primary peaks + */ + PEAK_FRIP( + ch_frip, + frip_score_header_multiqc, + min_frip_overlap + ) + ch_versions = ch_versions.mix(PEAK_FRIP.out.versions) + // PEAK_FRIP.out.frip_mqc | view + + /* + * MODULE: Calculate peak counts for primary peaks + */ + PRIMARY_PEAK_COUNTS( + peaks, + peak_count_header_multiqc + ) + ch_versions = ch_versions.mix(PRIMARY_PEAK_COUNTS.out.versions) + // PRIMARY_PEAK_COUNTS.out.count_mqc | view + + /* + * MODULE: Calculate peak counts for consensus peaks + */ + CONSENSUS_PEAK_COUNTS( + consensus_peaks, + peak_count_consensus_header_multiqc + ) + ch_versions = ch_versions.mix(CONSENSUS_PEAK_COUNTS.out.versions) + // CONSENSUS_PEAK_COUNTS.out.count_mqc | view + + /* + * MODULE: Trim unwanted columns for downstream reporting + */ + CUT_CALC_REPROD ( + peaks_with_ids + ) + ch_versions = ch_versions.mix(CUT_CALC_REPROD.out.versions) + + /* + * CHANNEL: Group samples based on group and filter for groups that have more than one file + */ + CUT_CALC_REPROD.out.file + .map { row -> [ row[0].group, row[1] ] } + .groupTuple(by: [0]) + .map { row -> [ [id: row[0]], row[1].flatten() ] } + .map { row -> [ row[0], row[1], row[1].size() ] } + .filter { row -> row[2] > 1 } + .map { row -> [ row[0], row[1] ] } + .set { ch_peak_bed_group } + //ch_peak_bed_group | view + + /* + * CHANNEL: Per group, create a channel per one against all combination + */ + ch_peak_bed_group.flatMap{ + row -> + def new_output = [] + row[1].each{ file -> + def files_copy = row[1].collect() + files_copy.remove(files_copy.indexOf(file)) + new_output.add([[id: file.name.split("\\.")[0]], file, files_copy]) + } + new_output + } + .set { ch_beds_intersect } + //EXAMPLE CHANNEL STRUCT: [[META], BED (-a), [BED...n] (-b)] + //ch_beds_intersect | view + + /* + * MODULE: Find intra-group overlap + */ + BEDTOOLS_INTERSECT ( + ch_beds_intersect, + "bed" + ) + ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions) + //EXAMPLE CHANNEL STRUCT: [[META], BED] + //BEDTOOLS_INTERSECT.out.intersect | view + + /* + * MODULE: Use overlap to calculate a peak repro % + */ + CALCULATE_PEAK_REPROD ( + BEDTOOLS_INTERSECT.out.intersect, + peak_reprod_header_multiqc + ) + ch_versions = ch_versions.mix(CALCULATE_PEAK_REPROD.out.versions) + //EXAMPLE CHANNEL STRUCT: [[META], TSV] + //CALCULATE_PEAK_REPROD.out.tsv + + /* + * CHANNEL: Prep for upset input + */ + consensus_peaks_unfiltered + .toSortedList { row -> row[0].id } + .map { list -> + def output = [] + list.each{ v -> output.add(v[1]) } + output + } + .set { ch_merged_bed_sorted } + + /* + * MODULE: Plot upset plots for sample peaks + */ + PLOT_CONSENSUS_PEAKS ( + ch_merged_bed_sorted.ifEmpty([]) + ) + ch_versions = ch_versions.mix(PLOT_CONSENSUS_PEAKS.out.versions) + + emit: + primary_frip_mqc = PEAK_FRIP.out.frip_mqc // channel: [ val(meta), [ mqc ] ] + primary_count_mqc = PRIMARY_PEAK_COUNTS.out.count_mqc // channel: [ val(meta), [ mqc ] ] + consensus_count_mqc = CONSENSUS_PEAK_COUNTS.out.count_mqc // channel: [ val(meta), [ mqc ] ] + reprod_perc_mqc = CALCULATE_PEAK_REPROD.out.mqc // channel: [ val(meta), [ mqc ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 020b347c..62a32dbb 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -2,20 +2,29 @@ * Uncompress and prepare reference genome files */ -include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/modules/gunzip/main.nf' -include { GUNZIP as GUNZIP_SPIKEIN_FASTA } from '../../modules/nf-core/modules/gunzip/main.nf' -include { GUNZIP as GUNZIP_GTF } from '../../modules/nf-core/modules/gunzip/main.nf' -include { GUNZIP as GUNZIP_BED } from '../../modules/nf-core/modules/gunzip/main.nf' -include { CUSTOM_GETCHROMSIZES as TARGET_CHROMSIZES } from '../../modules/nf-core/modules/custom/getchromsizes/main.nf' -include { CUSTOM_GETCHROMSIZES as SPIKEIN_CHROMSIZES } from '../../modules/nf-core/modules/custom/getchromsizes/main.nf' -include { UNTAR as UNTAR_INDEX_TARGET } from '../../modules/nf-core/modules/untar/main.nf' -include { UNTAR as UNTAR_INDEX_SPIKEIN } from '../../modules/nf-core/modules/untar/main.nf' -include { BOWTIE2_BUILD as BOWTIE2_BUILD_TARGET } from '../../modules/nf-core/modules/bowtie2/build/main' -include { BOWTIE2_BUILD as BOWTIE2_BUILD_SPIKEIN } from '../../modules/nf-core/modules/bowtie2/build/main' +include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main.nf' +include { GUNZIP as GUNZIP_SPIKEIN_FASTA } from '../../modules/nf-core/gunzip/main.nf' +include { GUNZIP as GUNZIP_GTF } from '../../modules/nf-core/gunzip/main.nf' +include { GUNZIP as GUNZIP_BED } from '../../modules/nf-core/gunzip/main.nf' +include { CUSTOM_GETCHROMSIZES as TARGET_CHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main.nf' +include { CUSTOM_GETCHROMSIZES as SPIKEIN_CHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main.nf' +include { UNTAR as UNTAR_INDEX_TARGET } from '../../modules/nf-core/untar/main.nf' +include { UNTAR as UNTAR_INDEX_SPIKEIN } from '../../modules/nf-core/untar/main.nf' +include { BOWTIE2_BUILD as BOWTIE2_BUILD_TARGET } from '../../modules/nf-core/bowtie2/build/main' +include { BOWTIE2_BUILD as BOWTIE2_BUILD_SPIKEIN } from '../../modules/nf-core/bowtie2/build/main' +include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' +include { GTF2BED } from '../../modules/local/gtf2bed' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' +include { BEDTOOLS_SORT as ANNOTATION_BEDTOOLS_SORT } from "../../modules/local/for_patch/bedtools/sort/main" +include { AWK as BLACKLIST_AWK } from "../../modules/local/linux/awk" +include { BEDTOOLS_INTERSECT as BLACKLIST_BEDTOOLS_INTERSECT } from "../../modules/nf-core/bedtools/intersect/main" +include { BEDTOOLS_SORT as BLACKLIST_BEDTOOLS_SORT } from "../../modules/local/for_patch/bedtools/sort/main" +include { BEDTOOLS_COMPLEMENT as BLACKLIST_BEDTOOLS_COMPLEMENT } from "../../modules/nf-core/bedtools/complement/main" workflow PREPARE_GENOME { take: prepare_tool_indices // list: tools to prepare indices for + blacklist // channel: blacklist file or empty channel main: ch_versions = Channel.empty() @@ -24,21 +33,22 @@ workflow PREPARE_GENOME { * Uncompress genome fasta file if required */ if (params.fasta.endsWith(".gz")) { - ch_fasta = GUNZIP_FASTA ( [ [:], params.fasta ] ).gunzip.map { it[1] } + ch_fasta = GUNZIP_FASTA ( [ [id:"target_fasta"], params.fasta ] ).gunzip ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) } else { - ch_fasta = file(params.fasta) + ch_fasta = Channel.from( file(params.fasta) ).map { row -> [[id:"spikein_fasta"], row] } } /* * Uncompress spike-in genome fasta file if required */ if (params.spikein_fasta.endsWith(".gz")) { - ch_spikein_fasta = GUNZIP_SPIKEIN_FASTA ( [ [:], params.spikein_fasta ] ).gunzip.map { it[1] } + ch_spikein_fasta = GUNZIP_SPIKEIN_FASTA ( [ [id:"spikein_fasta"], params.spikein_fasta ] ).gunzip ch_versions = ch_versions.mix(GUNZIP_SPIKEIN_FASTA.out.versions) } else { - ch_spikein_fasta = file(params.spikein_fasta) + ch_spikein_fasta = Channel.from( file(params.spikein_fasta) ).map { row -> [[id:"spikein_fasta"], row] } } + //ch_spikein_fasta | view /* * Uncompress GTF annotation file @@ -48,32 +58,72 @@ workflow PREPARE_GENOME { ch_gtf = GUNZIP_GTF ( [ [:], params.gtf ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) } else { - ch_gtf = file(params.gtf) + ch_gtf = Channel.from( file(params.gtf) ) } - /* - * Uncompress BED annotation file - */ ch_gene_bed = Channel.empty() if (params.gene_bed){ + /* + * Uncompress BED annotation file + */ if (params.gene_bed.endsWith(".gz")) { ch_gene_bed = GUNZIP_BED ( [ [:], params.gene_bed ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_BED.out.versions) } else { - ch_gene_bed = file(params.gene_bed) + ch_gene_bed = Channel.from( file(params.gene_bed) ) + } + } else { + /* + * Create GTF bed file if needed + */ + GTF2BED ( ch_gtf ) + ch_gene_bed = GTF2BED.out.bed + ch_versions = ch_versions.mix(GTF2BED.out.versions) + } + + /* + * Sort and index the bed annotation file + */ + ch_tabix = ch_gene_bed.map { + row -> [ [ id:row.getName() ] , row ] + } + + if (params.gene_bed && params.gene_bed.endsWith(".gz")) { + ch_tabix = ch_tabix.map { + row -> + new_id = row[0].id.split("\\.")[0] + [ [ id: new_id ] , row[1] ] } } + ANNOTATION_BEDTOOLS_SORT ( + ch_tabix, + "bed", + [] + ) + + TABIX_BGZIPTABIX ( + ANNOTATION_BEDTOOLS_SORT.out.sorted + ) + ch_gene_bed_index = TABIX_BGZIPTABIX.out.gz_tbi + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + + /* + * Index genome fasta file + */ + ch_fasta_index = SAMTOOLS_FAIDX ( ch_fasta ).fai + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + /* * Create chromosome sizes file */ - ch_chrom_sizes = TARGET_CHROMSIZES ( ch_fasta ).sizes + ch_chrom_sizes = TARGET_CHROMSIZES ( ch_fasta ).sizes.map{ it[1] } ch_versions = ch_versions.mix(TARGET_CHROMSIZES.out.versions) /* * Create chromosome sizes file for spike_in */ - ch_spikein_chrom_sizes = SPIKEIN_CHROMSIZES ( ch_spikein_fasta ).sizes + ch_spikein_chrom_sizes = SPIKEIN_CHROMSIZES ( ch_spikein_fasta ).sizes.map{ it[1] } /* * Uncompress Bowtie2 index or generate from scratch if required for both genomes @@ -107,12 +157,59 @@ workflow PREPARE_GENOME { } } + /* + * Use blacklist to create include regions for genome + */ + ch_genome_include_regions = Channel.empty() + if (params.blacklist) { + // Create bedfile from chrom sizes file + BLACKLIST_AWK ( + ch_chrom_sizes.map {row -> [ [id: "chromsizes"], row ]} + ) + ch_versions = ch_versions.mix(BLACKLIST_AWK.out.versions) + + // Create intersect channel between the chrom sizes bed and the blacklist bed + // This reduces the blacklist file down to the + ch_blacklist_intersect = blacklist + .map {row -> [ [id: "blacklist"], row ]} + .combine( BLACKLIST_AWK.out.file ) + .map {row -> [ row[0], row[1], row[3] ]} + //ch_blacklist_intersect | view + + // Intersect blacklist with available chromosomes + // this prevents error in the next two processes + BLACKLIST_BEDTOOLS_INTERSECT( + ch_blacklist_intersect, + "filtered.bed" + ) + ch_versions = ch_versions.mix(BLACKLIST_BEDTOOLS_INTERSECT.out.versions) + + // Sort the bed file + BLACKLIST_BEDTOOLS_SORT( + BLACKLIST_BEDTOOLS_INTERSECT.out.intersect, + "sorted.bed", + ch_chrom_sizes + ) + ch_versions = ch_versions.mix(BLACKLIST_BEDTOOLS_SORT.out.versions) + + // Find compliment of blacklist to show allowed regions + BLACKLIST_BEDTOOLS_COMPLEMENT( + BLACKLIST_BEDTOOLS_SORT.out.sorted, + ch_chrom_sizes + ) + ch_genome_include_regions = BLACKLIST_BEDTOOLS_COMPLEMENT.out.bed + ch_versions = ch_versions.mix(BLACKLIST_BEDTOOLS_COMPLEMENT.out.versions) + } + emit: fasta = ch_fasta // path: genome.fasta + fasta_index = ch_fasta_index // path: genome.fai chrom_sizes = ch_chrom_sizes // path: genome.sizes spikein_chrom_sizes = ch_spikein_chrom_sizes // path: genome.sizes gtf = ch_gtf // path: genome.gtf bed = ch_gene_bed // path: genome.bed + bed_index = ch_gene_bed_index // path: genome.bed_index + allowed_regions = ch_genome_include_regions // path: genome.regions bowtie2_index = ch_bt2_index // path: bt2/index/ bowtie2_spikein_index = ch_bt2_spikein_index // path: bt2/index/ diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf index 062f05cc..0b84501d 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/nf-core/bam_sort_samtools.nf @@ -2,8 +2,8 @@ * Sort, index BAM file and run samtools stats, flagstat and idxstats */ -include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' workflow BAM_SORT_SAMTOOLS { diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf index e47163eb..87215687 100644 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ b/subworkflows/nf-core/bam_stats_samtools.nf @@ -2,9 +2,9 @@ * Run SAMtools stats, flagstat and idxstats */ -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/modules/samtools/idxstats/main' -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/modules/samtools/flagstat/main' +include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' workflow BAM_STATS_SAMTOOLS { take: @@ -14,13 +14,13 @@ workflow BAM_STATS_SAMTOOLS { ch_versions = Channel.empty() SAMTOOLS_STATS ( ch_bam_bai, [] ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) + ch_versions = ch_versions.mix( SAMTOOLS_STATS.out.versions.first() ) SAMTOOLS_FLAGSTAT ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) + ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT.out.versions.first() ) SAMTOOLS_IDXSTATS ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) + ch_versions = ch_versions.mix( SAMTOOLS_IDXSTATS.out.versions.first() ) emit: stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] diff --git a/subworkflows/nf-core/deeptools_qc.nf b/subworkflows/nf-core/deeptools_qc.nf new file mode 100644 index 00000000..7a03d8f9 --- /dev/null +++ b/subworkflows/nf-core/deeptools_qc.nf @@ -0,0 +1,108 @@ +/* + * Perform full suite of deep tools analysis on bam files +*/ + +include { DEEPTOOLS_MULTIBAMSUMMARY } from '../../modules/local/deeptools/multibamsummary/main' +include { DEEPTOOLS_PLOTCORRELATION } from '../../modules/local/deeptools/plotcorrelation/main' +include { DEEPTOOLS_PLOTPCA } from '../../modules/local/deeptools/plotpca/main' +include { DEEPTOOLS_PLOTFINGERPRINT } from '../../modules/nf-core/deeptools/plotfingerprint/main' + +workflow DEEPTOOLS_QC { + take: + bam // channel: [ val(meta), [ bam ] ] + bai // channel: [ val(meta), [ bai ] ] + + main: + ch_versions = Channel.empty() + + /* + * CHANNEL: Filter bams for target only + */ + bam.filter { it -> it[0].is_control == false } + .set { ch_bam_target } + //ch_bam_target | view + + /* + * CHANNEL: Filter bais for target only + */ + bai.filter { it -> it[0].is_control == false } + .set { ch_bai_target } + //ch_bai_target | view + + /* + * CHANNEL: Combine bam and bai files on id + */ + ch_bam_target.map { row -> [row[0].id, row ].flatten()} + .join ( ch_bai_target.map { row -> [row[0].id, row ].flatten()} ) + .map { row -> [row[1], row[2], row[4]] } + .set { ch_bam_bai } + // EXAMPLE CHANNEL STRUCT: [[META], BAM, BAI] + //ch_bam_bai | view + + /* + * CHANNEL: Get list of sample ids + */ + ch_bam_target.map { row -> [row[0].id] } + .collect() + .map { row -> [row] } + .set { ch_ids } + //ch_ids | view + + /* + * CHANNEL: Combine bam and bai files into one list + * if we only have one file then cancel correlation and PCA + */ + ch_bam_target.map { row -> [row[1]] } + .collect() + .map { row -> [row] } + .combine( ch_bai_target.map { row -> [row[1]] }.collect().map { row -> [row] } ) + .combine( ch_ids ) + .map { row -> [[id: 'all_target_bams'], row[0], row[1], row[2], row[1].size()] } + .filter { row -> row[4] > 1 } + .map { row -> [row[0], row[1], row[2], row[3]] } + .set { ch_bam_bai_all } + //ch_bam_bai_all | view + + /* + * MODULE: Summarise bams into bins + */ + DEEPTOOLS_MULTIBAMSUMMARY ( + ch_bam_bai_all + ) + ch_versions = ch_versions.mix(DEEPTOOLS_MULTIBAMSUMMARY.out.versions) + //DEEPTOOLS_MULTIBAMSUMMARY.out.matrix | view + + /* + * MODULE: Plot correlation matrix + */ + DEEPTOOLS_PLOTCORRELATION ( + DEEPTOOLS_MULTIBAMSUMMARY.out.matrix + ) + ch_versions = ch_versions.mix(DEEPTOOLS_PLOTCORRELATION.out.versions) + //DEEPTOOLS_MULTIBAMSUMMARY.out.matrix | view + + /* + * MODULE: Plot PCA's + */ + DEEPTOOLS_PLOTPCA ( + DEEPTOOLS_MULTIBAMSUMMARY.out.matrix + ) + ch_versions = ch_versions.mix(DEEPTOOLS_PLOTPCA.out.versions) + //DEEPTOOLS_PLOTPCA.out.matrix | view + + /* + * MODULE: Plot Fingerprint + */ + DEEPTOOLS_PLOTFINGERPRINT ( + ch_bam_bai + ) + ch_versions = ch_versions.mix(DEEPTOOLS_PLOTFINGERPRINT.out.versions) + //DEEPTOOLS_PLOTFINGERPRINT.out.matrix | view + + emit: + correlation_matrix = DEEPTOOLS_PLOTCORRELATION.out.matrix + pca_data = DEEPTOOLS_PLOTPCA.out.tab + fingerprint_matrix = DEEPTOOLS_PLOTFINGERPRINT.out.matrix + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/mark_duplicates_picard.nf b/subworkflows/nf-core/mark_duplicates_picard.nf index 2dd25e80..1b8d85b2 100644 --- a/subworkflows/nf-core/mark_duplicates_picard.nf +++ b/subworkflows/nf-core/mark_duplicates_picard.nf @@ -2,14 +2,13 @@ * Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats */ -include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/modules/picard/markduplicates/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' +include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/picard/markduplicates/main' +include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' workflow MARK_DUPLICATES_PICARD { take: bam // channel: [ val(meta), [ bam ] ] - process_target //boolean + process_target // boolean main: /* @@ -22,7 +21,7 @@ workflow MARK_DUPLICATES_PICARD { PICARD_MARKDUPLICATES ( bam ) ch_bam = PICARD_MARKDUPLICATES.out.bam metrics = PICARD_MARKDUPLICATES.out.metrics - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions) + ch_versions = ch_versions.mix( PICARD_MARKDUPLICATES.out.versions ) } else { // Split out control files and run only on these bam.branch { it -> @@ -34,40 +33,36 @@ workflow MARK_DUPLICATES_PICARD { //ch_split.control | view PICARD_MARKDUPLICATES ( ch_split.control ) - ch_bam = PICARD_MARKDUPLICATES.out.bam.mix ( ch_split.target ) + + // Prevents issues with resume with the branch elements coming in the wrong order + ch_sorted_targets = ch_split.target + .toSortedList { row -> row[0].id } + .flatMap() + + ch_sorted_controls = PICARD_MARKDUPLICATES.out.bam + .toSortedList { row -> row[0].id } + .flatMap() + + ch_bam = ch_sorted_targets.concat ( ch_sorted_controls ) metrics = PICARD_MARKDUPLICATES.out.metrics - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions) + ch_versions = ch_versions.mix( PICARD_MARKDUPLICATES.out.versions ) } //ch_bam | view /* - * Index BAM file - */ - SAMTOOLS_INDEX ( ch_bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - - // Join bam/bai - ch_bam - .map { row -> [row[0].id, row ].flatten()} - .join ( SAMTOOLS_INDEX.out.bai.map { row -> [row[0].id, row ].flatten()} ) - .map { row -> [row[1], row[2], row[4]] } - .set { ch_bam_bai } - //ch_bam_bai | view - - /* - * Run samtools stats, flagstat and idxstats + * WORKFLOW: Re sort and index all the bam files + calculate stats */ - BAM_STATS_SAMTOOLS ( ch_bam_bai ) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + BAM_SORT_SAMTOOLS ( + ch_bam + ) emit: - bam = ch_bam // channel: [ val(meta), [ bam ] ] - metrics // channel: [ val(meta), [ metrics ] ] - - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + metrics // channel: [ val(meta), [ metrics ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/prepare_peakcalling.nf b/subworkflows/nf-core/prepare_peakcalling.nf index c332da4e..13578d1e 100644 --- a/subworkflows/nf-core/prepare_peakcalling.nf +++ b/subworkflows/nf-core/prepare_peakcalling.nf @@ -2,11 +2,11 @@ * Convert bam files to bedgraph and bigwig with apropriate normalisation */ -include { BEDTOOLS_GENOMECOV } from "../../modules/nf-core/modules/bedtools/genomecov/main" -include { DEEPTOOLS_BAMCOVERAGE } from "../../modules/local/modules/deeptools/bamcoverage/main" -include { BEDTOOLS_SORT } from "../../modules/nf-core/modules/bedtools/sort/main" -include { UCSC_BEDCLIP } from "../../modules/nf-core/modules/ucsc/bedclip/main" -include { UCSC_BEDGRAPHTOBIGWIG } from "../../modules/nf-core/modules/ucsc/bedgraphtobigwig/main" +include { BEDTOOLS_GENOMECOV } from "../../modules/nf-core/bedtools/genomecov/main" +include { DEEPTOOLS_BAMCOVERAGE } from "../../modules/local/deeptools/bamcoverage/main" +include { BEDTOOLS_SORT } from "../../modules/local/for_patch/bedtools/sort/main" +include { UCSC_BEDCLIP } from "../../modules/nf-core/ucsc/bedclip/main" +include { UCSC_BEDGRAPHTOBIGWIG } from "../../modules/nf-core/ucsc/bedgraphtobigwig/main" workflow PREPARE_PEAKCALLING { take: @@ -14,58 +14,53 @@ workflow PREPARE_PEAKCALLING { ch_bai // channel: [ val(meta), [ bai ] ] ch_chrom_sizes // channel: [ sizes ] ch_dummy_file // channel: [ dummy ] - norm_mode // value: ["Spikein", "RPKM", "CPM", "BPM", "RPGC", "None" ] + norm_mode // value: ["Spikein", "RPKM", "CPM", "BPM", "RPGC", "None" ] + metadata // channel [ csv ] main: ch_versions = Channel.empty() - ch_bam_out = Channel.empty() ch_bedgraph = Channel.empty() if (norm_mode == "Spikein") { + /* + * CHANNEL: Load up alignment metadata into channel + */ + metadata.splitCsv ( header:true, sep:"," ) + .map { row -> [ row[0].id, row[1] ]} + .set { ch_metadata } + //ch_metadata | view + /* * CHANNEL: Calculate scale factor for each sample based on a constant devided by the number * of reads aligned to the spike-in genome. */ - ch_bam + ch_bam.map { row -> [ row[0].id, row[0], row[1] ]} + .join ( ch_metadata ) .map { row -> - def denominator = row[0].find{ it.key == "bt2_total_aligned_spikein" }?.value.toInteger() - [ row[0].id, params.normalisation_c / (denominator != 0 ? denominator : 1) ] + def denominator = row[3].find{ it.key == "bt2_total_aligned" }?.value.toInteger() + [ row[1], row[2], params.normalisation_c / (denominator != 0 ? denominator : params.normalisation_c) ] } - .set { ch_scale_factor } + .set { ch_bam_scale_factor } // EXAMPLE CHANNEL STRUCT: [id, scale_factor] - //ch_scale_factor | view + //ch_bam_scale_factor | view } else if (norm_mode == "None") { /* * CHANNEL: Assign scale factor of 1 */ - ch_bam - .map { row -> - [ row[0].id, 1 ] + ch_bam.map { row -> + [ row[0], row[1], 1 ] } - .set { ch_scale_factor } + .set { ch_bam_scale_factor } + //ch_bam_scale_factor | view } if (norm_mode == "Spikein" || norm_mode == "None") { - /* - * CHANNEL: Create a channel with the scale factor as a seperate value - */ - ch_bam - .map { row -> [row[0].id, row ].flatten()} - .join ( ch_scale_factor ) - .map { row -> row[1..(row.size() - 1)] } - .map { row -> - row[0].put("scale_factor", row[2]) - [ row[0], row[1], row[2] ] } - .set { ch_bam_scale } - //EXAMPLE CHANNEL STRUCT: [[META + scale_factor:10000], BAM, SCALE_FACTOR] - //ch_bam_scale | view - /* * MODULE: Convert bam files to bedgraph */ BEDTOOLS_GENOMECOV ( - ch_bam_scale, + ch_bam_scale_factor, ch_dummy_file, "bedGraph" ) @@ -73,15 +68,6 @@ workflow PREPARE_PEAKCALLING { ch_bedgraph = BEDTOOLS_GENOMECOV.out.genomecov //EXAMPLE CHANNEL STRUCT: [META], BEDGRAPH] //BEDTOOLS_GENOMECOV.out.genomecov | view - - /* - * CHANNEL: Add the scale factor values to the main meta-data stream - */ - ch_bam_scale - .map { row -> [ row[0], row[1] ] } - .set { ch_bam_out } - //EXAMPLE CHANNEL STRUCT: [[META], BAM] - //ch_samtools_bam | view } else { /* * CHANNEL: Combine bam and bai files on id @@ -144,9 +130,6 @@ workflow PREPARE_PEAKCALLING { ch_bedgraph = DEEPTOOLS_BAMCOVERAGE.out.bedgraph // EXAMPLE CHANNEL STRUCT: [[META], BAM, BAI] //ch_bedgraph | view - - // Dont assign any new meta data - ch_bam_out = ch_bam } /* @@ -154,7 +137,8 @@ workflow PREPARE_PEAKCALLING { */ BEDTOOLS_SORT ( ch_bedgraph, - "bedGraph" + "bedGraph", + [] ) ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) @@ -181,7 +165,6 @@ workflow PREPARE_PEAKCALLING { //UCSC_BEDGRAPHTOBIGWIG.out.bigwig | view emit: - bam = ch_bam_out // channel: [ val(meta), [ bam ] ] bedgraph = UCSC_BEDCLIP.out.bedgraph // channel: [ val(meta), [ bedgraph ] ] bigwig = UCSC_BEDGRAPHTOBIGWIG.out.bigwig // channel: [ val(meta), [ bigwig ] ] versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/nf-core/samtools_view_sort_stats.nf b/subworkflows/nf-core/samtools_view_sort_stats.nf index 7071062b..cda5a59f 100644 --- a/subworkflows/nf-core/samtools_view_sort_stats.nf +++ b/subworkflows/nf-core/samtools_view_sort_stats.nf @@ -2,21 +2,22 @@ * Run bam files through samtools view and reindex and calc stats */ -include { SAMTOOLS_VIEW } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' +include { SAMTOOLS_VIEW } from '../../modules/local/for_patch/samtools/view/main' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools' workflow SAMTOOLS_VIEW_SORT_STATS { take: - bam // channel: [ val(meta), [ bam ] ] + bam // channel: [ val(meta), [ bam ] ] + regions // channel: [ regions ] main: ch_versions = Channel.empty() /* * Filter BAM file */ - SAMTOOLS_VIEW ( bam.map{ row -> [ row[0], row[1], [] ] }, [] ) + SAMTOOLS_VIEW ( bam.map{ row -> [ row[0], row[1], [], ] }, [], regions ) ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) /* @@ -45,5 +46,5 @@ workflow SAMTOOLS_VIEW_SORT_STATS { stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config index 8c62e29e..79cb9380 100644 --- a/tests/config/nextflow.config +++ b/tests/config/nextflow.config @@ -1,10 +1,23 @@ params { outdir = "results/" publish_dir_mode = "copy" + enable_conda = false + singularity_pull_docker_container = false } process { cpus = 2 memory = 6.GB time = 6.h -} \ No newline at end of file +} + +if ("$PROFILE" == "singularity") { + singularity.enabled = true + singularity.autoMounts = true +} else if ("$PROFILE" == "conda") { + params.enable_conda = true +} else { + docker.enabled = true + docker.userEmulation = true + docker.runOptions = "--platform linux/x86_64" +} diff --git a/tests/test_01_genome_options.yml b/tests/test_01_genome_options.yml new file mode 100644 index 00000000..14082bde --- /dev/null +++ b/tests/test_01_genome_options.yml @@ -0,0 +1,34 @@ +- name: test_param_check_custom_genome_blacklist + command: nextflow run main.nf -profile docker,test_fasta_only --only_genome --gtf https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr20-genes.gtf.gz --blacklist ./assets/blacklists/hg38-blacklist.bed -c tests/config/nextflow.config + tags: + - test_genome_options + +- name: test_param_check_custom_genome_noblacklist + command: nextflow run main.nf -profile docker,test_fasta_only --only_genome --gtf https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr20-genes.gtf.gz -c tests/config/nextflow.config + tags: + - test_genome_options + +- name: test_param_check_custom_genome_nobed + command: nextflow run main.nf -profile docker,test --only_genome -c tests/config/nextflow.config + tags: + - test_genome_options + +- name: test_param_check_custom_genome_gtf + command: nextflow run main.nf -profile docker,test_fasta_only --only_genome --gtf https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr7-genes.gtf -c tests/config/nextflow.config + tags: + - test_genome_options + +- name: test_param_check_custom_genome_gtf_gz + command: nextflow run main.nf -profile docker,test_fasta_only --only_genome --gtf https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr20-genes.gtf.gz -c tests/config/nextflow.config + tags: + - test_genome_options + +- name: test_param_check_custom_genome_bed + command: nextflow run main.nf -profile docker,test_fasta_only --only_genome --gtf https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr20-genes.gtf.gz --gene_bed https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr7-genes.bed -c tests/config/nextflow.config + tags: + - test_genome_options + +- name: test_param_check_custom_genome_bed_gz + command: nextflow run main.nf -profile docker,test_fasta_only --only_genome --gtf https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr20-genes.gtf.gz --gene_bed https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr7-genes.bed.gz -c tests/config/nextflow.config + tags: + - test_genome_options diff --git a/tests/test_samplesheet_check.yml b/tests/test_02_samplesheet_check.yml similarity index 58% rename from tests/test_samplesheet_check.yml rename to tests/test_02_samplesheet_check.yml index 856493c0..72329782 100644 --- a/tests/test_samplesheet_check.yml +++ b/tests/test_02_samplesheet_check.yml @@ -1,13 +1,13 @@ # Test incorrect header - name: test_samplesheet_header_error - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/header_error.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/header_error.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test use_control false when noigg in sample sheet does not produce error - name: test_samplesheet_noigg_pos - command: nextflow run main.nf -profile docker,test --only_input true --use_control false --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/no_igg.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --use_control false --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/no_igg.csv -c tests/config/nextflow.config tags: - test_samplesheet files: @@ -15,7 +15,7 @@ # Test use_control false when control in sample sheet does not produce error - name: test_samplesheet_noigg_pos_ctrl_neg - command: nextflow run main.nf -profile docker,test --only_input true --use_control false --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-small.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --use_control false --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-small.csv -c tests/config/nextflow.config tags: - test_samplesheet files: @@ -23,7 +23,7 @@ # Test use_control true when noigg in sample sheet does produce error - name: test_samplesheet_noigg_neg - command: nextflow run main.nf -profile docker,test --only_input true --use_control true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/no_igg.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --use_control true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/no_igg.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 @@ -38,91 +38,91 @@ # Test invalid number of columns in row - name: test_samplesheet_invalid_cols_in_row - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/invalid_column_in_row.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/invalid_column_in_row.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test group is blank - name: test_samplesheet_group_is_blank - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/group_is_blank.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/group_is_blank.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test group has spaces - name: test_samplesheet_group_has_spaces - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/group_has_spaces.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/group_has_spaces.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test control has spaces - name: test_samplesheet_control_has_spaces - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/control_has_spaces.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/control_has_spaces.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test group equals control - name: test_samplesheet_group_equals_control - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/group_equals_control.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/group_equals_control.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test negative replicate number - name: test_samplesheet_neg_rep - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/negative_rep_num.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/negative_rep_num.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test replicate number does not start at 1 - name: test_samplesheet_rep_not_start_one - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/rep_not_start_one.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/rep_not_start_one.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test fastq ext incorrect - name: test_samplesheet_fastq_ext_error - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/fastq_ext_error.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/fastq_ext_error.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test fastq path spaces - name: test_samplesheet_fastq_path_spaces_error - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/fastq_file_spaces.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/fastq_file_spaces.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test duplicate rows - name: test_samplesheet_dup_rows - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/duplicate_rows.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/duplicate_rows.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test control not exist - name: test_samplesheet_ctrl_not_exist - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/control_not_exist.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/control_not_exist.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test control with control - name: test_samplesheet_ctrl_with_ctrl - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/control_with_control.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/control_with_control.csv -c tests/config/nextflow.config tags: - test_samplesheet exit_code: 1 # Test small sample sheet - name: test_samplesheet_small - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-small.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-small.csv -c tests/config/nextflow.config tags: - test_samplesheet files: @@ -130,7 +130,7 @@ # Test small tech reps - name: test_samplesheet_small_tech_reps - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-small-tech-reps.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-small-tech-reps.csv -c tests/config/nextflow.config tags: - test_samplesheet files: @@ -138,55 +138,55 @@ # Test noigg small - name: test_samplesheet_small_noigg - command: nextflow run main.nf -profile docker,test --only_input true --use_control false --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-noigg-small.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --use_control false --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-noigg-small.csv -c tests/config/nextflow.config tags: - - test_samplesheet + - test_samplesheet_2 files: - path: results/pipeline_info/samplesheet.valid.csv # Test all - name: test_samplesheet_all - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all.csv -c tests/config/nextflow.config tags: - - test_samplesheet + - test_samplesheet_2 files: - path: results/pipeline_info/samplesheet.valid.csv # Test all small - name: test_samplesheet_all_small - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-small.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-small.csv -c tests/config/nextflow.config tags: - - test_samplesheet + - test_samplesheet_2 files: - path: results/pipeline_info/samplesheet.valid.csv # Test all multi-rep - name: test_samplesheet_all_multi_rep - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep.csv -c tests/config/nextflow.config tags: - - test_samplesheet + - test_samplesheet_2 files: - path: results/pipeline_info/samplesheet.valid.csv # Test all multi-rep single-ctrl - name: test_samplesheet_multi_rep_single_ctrl - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-single-ctrl.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-single-ctrl.csv -c tests/config/nextflow.config tags: - - test_samplesheet + - test_samplesheet_2 files: - path: results/pipeline_info/samplesheet.valid.csv # Test all multi-rep multi-single-ctrl - name: test_samplesheet_multi_rep_multi_single_ctrl - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-multi-single-ctrl.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-multi-single-ctrl.csv -c tests/config/nextflow.config tags: - - test_samplesheet + - test_samplesheet_2 files: - path: results/pipeline_info/samplesheet.valid.csv # Test legacy 1_0 samplesheet - name: test_samplesheet_legacy_1_0 - command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/legacy/1_0_samplesheet.csv -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/unit_tests/legacy/1_0_samplesheet.csv -c tests/config/nextflow.config tags: - - test_samplesheet + - test_samplesheet_2 exit_code: 1 diff --git a/tests/test_03_ailgnment_filtering.yml b/tests/test_03_ailgnment_filtering.yml new file mode 100644 index 00000000..f464c18b --- /dev/null +++ b/tests/test_03_ailgnment_filtering.yml @@ -0,0 +1,11 @@ +- name: test_filtering_noqfilter + command: nextflow run main.nf -profile docker,test --only_filtering true -c tests/config/nextflow.config + tags: + - test_filtering + - test_filtering_noqfilter + +- name: test_filtering_withqfilter + command: nextflow run main.nf -profile docker,test --only_filtering true --minimum_alignment_q_score 10 -c tests/config/nextflow.config + tags: + - test_filtering + - test_filtering_withqfilter diff --git a/tests/test_bam_scaling.yml b/tests/test_04_bam_scaling.yml similarity index 58% rename from tests/test_bam_scaling.yml rename to tests/test_04_bam_scaling.yml index 403f3e17..ad730804 100644 --- a/tests/test_bam_scaling.yml +++ b/tests/test_04_bam_scaling.yml @@ -1,69 +1,55 @@ - name: test_bam_scale_none - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --normalisation_mode None -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode None -c tests/config/nextflow.config tags: - test_bam_scale - test_bam_scale_none files: - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_R1.sorted.bedGraph - contains: - - "chr20 171984 172009 1" - name: test_bam_scale_spikein - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --normalisation_mode Spikein -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode Spikein -c tests/config/nextflow.config tags: - test_bam_scale - test_bam_scale_spikein files: - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_R1.sorted.bedGraph - contains: - - "chr20 171984 172009 10000" - name: test_bam_scale_cpm - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --normalisation_mode CPM -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode CPM -c tests/config/nextflow.config tags: - test_bam_scale - test_bam_scale_cpm files: - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_R1.sorted.bedGraph - contains: - - "chr20 171984 172009 1607.72" - name: test_bam_scale_rpkm - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --normalisation_mode RPKM -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode RPKM -c tests/config/nextflow.config tags: - test_bam_scale - test_bam_scale_rpkm files: - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_R1.sorted.bedGraph - contains: - - "chr20 171984 172009 1.60772e+06" - name: test_bam_scale_bpm - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --normalisation_mode BPM -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode BPM -c tests/config/nextflow.config tags: - test_bam_scale - test_bam_scale_bpm files: - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_R1.sorted.bedGraph - contains: - - "chr20 171984 172009 1607.72" - name: test_bam_scale_cpm_iggscale - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --normalisation_mode CPM --igg_scale_factor 0.1 -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode CPM --igg_scale_factor 0.1 -c tests/config/nextflow.config tags: - test_bam_scale - test_bam_scale_cpm_iggscale files: - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_R1.sorted.bedGraph - contains: - - "chr20 171984 172009 1607.72" - path: results/03_peak_calling/01_bam_to_bedgraph/igg_ctrl_R1.sorted.bedGraph - contains: - - "chr20 606391 606415 285.714" - name: test_normalisation_mode_invalid command: - nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --normalisation_mode test -c tests/config/nextflow.config + nextflow run main.nf -profile docker,test --only_peak_calling --normalisation_mode test -c tests/config/nextflow.config - test_bam_scale - test_bam_scale_invalid exit_code: 1 diff --git a/tests/test_05_peak_callers.yml b/tests/test_05_peak_callers.yml new file mode 100644 index 00000000..c18d295b --- /dev/null +++ b/tests/test_05_peak_callers.yml @@ -0,0 +1,79 @@ +- name: test_peak_callers_seacr + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_seacr + files: + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_R1.seacr.peaks.stringent.bed + +- name: test_peak_callers_macs2 + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller macs2 -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_macs2 + files: + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_R1.macs2_peaks.narrowPeak + +- name: test_peak_callers_macs2_broad_peak + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller macs2 --macs2_narrow_peak false -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_macs2 + files: + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_R1.macs2_peaks.broadPeak + +- name: test_peak_callers_invalid_name + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller test -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_invalid + exit_code: 1 + +- name: test_peak_callers_seacr_macs2 + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr,macs2 -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_seacr_macs2 + files: + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_R1.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_R1.macs2_peaks.narrowPeak + +- name: test_peak_callers_macs2_seacr + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller macs2,seacr -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_macs2_seacr + files: + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_R1.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_R1.macs2_peaks.narrowPeak + +- name: test_peak_callers_seacr_macs2_noigg + command: nextflow run main.nf -profile docker,test_no_control --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr,macs2 -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_seacr_macs2_noigg + files: + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_R1.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_R1.macs2_peaks.narrowPeak + +- name: test_peak_callers_seacr_single_ctrl_multi_rep + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-single-ctrl.csv -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_ctrl_tests + files: + - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_R1.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_R2.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_R1.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_R2.seacr.peaks.stringent.bed + +- name: test_peak_callers_seacr_multi_single_ctrl_multi_rep + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-multi-single-ctrl.csv -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_ctrl_tests + files: + - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_R1.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_R2.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_R1.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_R2.seacr.peaks.stringent.bed diff --git a/tests/test_06_consenseus_peaks.yml b/tests/test_06_consenseus_peaks.yml new file mode 100644 index 00000000..4cee6557 --- /dev/null +++ b/tests/test_06_consenseus_peaks.yml @@ -0,0 +1,26 @@ +- name: test_consensus_peaks_group + command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling --skip_fastqc --skip_multiqc --skip_preseq --consensus_peak_mode group -c tests/config/nextflow.config + tags: + - test_consensus_peaks + - test_consensus_peaks_group + files: + - path: results/03_peak_calling/05_consensus_peaks/h3k4me3.consensus.peak_counts.bed + - path: results/03_peak_calling/05_consensus_peaks/h3k4me3.consensus.peaks.awk.bed + - path: results/03_peak_calling/05_consensus_peaks/h3k27me3.consensus.peak_counts.bed + - path: results/03_peak_calling/05_consensus_peaks/h3k27me3.consensus.peaks.awk.bed + +- name: test_consensus_peaks_all + command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling --skip_fastqc --skip_multiqc --skip_preseq --consensus_peak_mode all -c tests/config/nextflow.config + tags: + - test_consensus_peaks + - test_consensus_peaks_all + files: + - path: results/03_peak_calling/05_consensus_peaks/all_samples.consensus.peaks.awk.bed + - path: results/03_peak_calling/05_consensus_peaks/all_samples.consensus.peak_counts.bed + +- name: test_consensus_peaks_invalid + command: + nextflow run main.nf -profile docker,test_full_small --only_peak_calling --skip_fastqc --skip_multiqc --skip_preseq --consensus_peak_mode test -c tests/config/nextflow.config + - test_consensus_peaks + - test_consensus_peaks_invalid + exit_code: 1 diff --git a/tests/test_consenseus_peaks.yml b/tests/test_consenseus_peaks.yml deleted file mode 100644 index aa6dacd8..00000000 --- a/tests/test_consenseus_peaks.yml +++ /dev/null @@ -1,26 +0,0 @@ -- name: test_conseneus_peaks_group - command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling true --skip_fastqc true --consensus_peak_mode group -c tests/config/nextflow.config - tags: - - test_conseneus_peaks - - test_conseneus_peaks_group - files: - - path: results/03_peak_calling/05_consensus_peaks/h3k4me3.consensus.peaks.bed - - path: results/03_peak_calling/05_consensus_peaks/h3k4me3.consensus.peaks.filtered.awk.bed - - path: results/03_peak_calling/05_consensus_peaks/h3k27me3.consensus.peaks.bed - - path: results/03_peak_calling/05_consensus_peaks/h3k27me3.consensus.peaks.filtered.awk.bed - -- name: test_conseneus_peaks_all - command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling true --skip_fastqc true --consensus_peak_mode all -c tests/config/nextflow.config - tags: - - test_conseneus_peaks - - test_conseneus_peaks_all - files: - - path: results/03_peak_calling/05_consensus_peaks/all_samples.awk.bed - - path: results/03_peak_calling/05_consensus_peaks/all_samples.consensus.peaks.bed - -- name: test_conseneus_peaks_invalid - command: - nextflow run main.nf -profile docker,test_full_small --only_peak_calling true --skip_fastqc true --consensus_peak_mode test -c tests/config/nextflow.config - - test_conseneus_peaks - - test_conseneus_peaks_invalid - exit_code: 1 diff --git a/tests/test_param_check.yml b/tests/test_param_check.yml deleted file mode 100644 index f97f308d..00000000 --- a/tests/test_param_check.yml +++ /dev/null @@ -1,13 +0,0 @@ -- name: test_param_check_custom_genome_blacklist - command: nextflow run main.nf -profile docker,test_fasta_only --only_input true --gtf https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr20-genes.gtf.gz --blacklist ./assets/blacklists/hg38-blacklist.bed -c tests/config/nextflow.config - tags: - - test_params - - params_customgenome - - params_customgenome_blacklist - -- name: test_param_check_custom_genome_noblacklist - command: nextflow run main.nf -profile docker,test_fasta_only --only_input true --gtf https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/reference/genomes/hg38-chr20-genes.gtf.gz -c tests/config/nextflow.config - tags: - - test_params - - params_customgenome - - params_customgenome_noblacklist diff --git a/tests/test_peak_callers.yml b/tests/test_peak_callers.yml deleted file mode 100644 index fcf00f58..00000000 --- a/tests/test_peak_callers.yml +++ /dev/null @@ -1,71 +0,0 @@ -- name: test_peak_callers_seacr - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --peakcaller seacr -c tests/config/nextflow.config - tags: - - test_peak_callers - - test_peak_callers_seacr - files: - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.seacr.peaks.bed.stringent.bed - -- name: test_peak_callers_macs2 - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --peakcaller macs2 -c tests/config/nextflow.config - tags: - - test_peak_callers - - test_peak_callers_macs2 - files: - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.macs2.peaks.bed_summits.bed - -- name: test_peak_callers_invalid_name - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --peakcaller test -c tests/config/nextflow.config - tags: - - test_peak_callers - - test_peak_callers_invalid - exit_code: 1 - -- name: test_peak_callers_seacr_macs2 - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --peakcaller seacr,macs2 -c tests/config/nextflow.config - tags: - - test_peak_callers - - test_peak_callers_seacr_macs2 - files: - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.macs2.peaks.bed_summits.bed - -- name: test_peak_callers_macs2_seacr - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --peakcaller macs2,seacr -c tests/config/nextflow.config - tags: - - test_peak_callers - - test_peak_callers_macs2_seacr - files: - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.macs2.peaks.bed_summits.bed - -- name: test_peak_callers_seacr_macs2_noigg - command: nextflow run main.nf -profile docker,test_no_control --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --peakcaller seacr,macs2 -c tests/config/nextflow.config - tags: - - test_peak_callers - - test_peak_callers_seacr_macs2_noigg - files: - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.macs2.peaks.bed_summits.bed - -- name: test_peak_callers_seacr_single_ctrl_multi_rep - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --peakcaller seacr --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-single-ctrl.csv -c tests/config/nextflow.config - tags: - - test_peak_callers - - test_peak_callers_ctrl_tests - files: - - path: results/03_peak_calling/04_called_peaks/h3k4me3_R1.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/h3k4me3_R2.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R2.seacr.peaks.bed.stringent.bed - -- name: test_peak_callers_seacr_multi_single_ctrl_multi_rep - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --skip_removeduplicates true --peakcaller seacr --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-multi-single-ctrl.csv -c tests/config/nextflow.config - tags: - - test_peak_callers - - test_peak_callers_ctrl_tests - files: - - path: results/03_peak_calling/04_called_peaks/h3k4me3_R1.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/h3k4me3_R2.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R2.seacr.peaks.bed.stringent.bed diff --git a/tests/test_verify_output_only_input.yml b/tests/test_verify_output_01_only_input.yml similarity index 92% rename from tests/test_verify_output_only_input.yml rename to tests/test_verify_output_01_only_input.yml index 31403144..ea6f0d67 100644 --- a/tests/test_verify_output_only_input.yml +++ b/tests/test_verify_output_01_only_input.yml @@ -1,7 +1,6 @@ - name: test_verify_output_only_input command: nextflow run main.nf -profile docker,test --only_input true -c tests/config/nextflow.config tags: - - verify_output - verify_output_only_input files: - path: results/pipeline_info/samplesheet.valid.csv diff --git a/tests/test_verify_output_save_merged.yml b/tests/test_verify_output_01_save_merged.yml similarity index 71% rename from tests/test_verify_output_save_merged.yml rename to tests/test_verify_output_01_save_merged.yml index ada48731..d0484570 100644 --- a/tests/test_verify_output_save_merged.yml +++ b/tests/test_verify_output_01_save_merged.yml @@ -1,8 +1,6 @@ - name: test_verify_output_save_merged - command: nextflow run main.nf -profile docker,test_tech_reps --save_merged_fastq true --only_preqc true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test_tech_reps --save_merged_fastq --only_preqc -c tests/config/nextflow.config tags: - - verify_output - - verify_output_save - verify_output_save_merged files: - path: results/01_prealign/merged_fastq/h3k27me3_R1_1.merged.fastq.gz diff --git a/tests/test_verify_output_save_trimmed.yml b/tests/test_verify_output_01_save_trimmed.yml similarity index 68% rename from tests/test_verify_output_save_trimmed.yml rename to tests/test_verify_output_01_save_trimmed.yml index 43ae6b4a..0e4026d2 100644 --- a/tests/test_verify_output_save_trimmed.yml +++ b/tests/test_verify_output_01_save_trimmed.yml @@ -1,8 +1,6 @@ - name: test_verify_output_save_trimmed - command: nextflow run main.nf -profile docker,test --only_preqc true --save_trimmed true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_preqc --save_trimmed -c tests/config/nextflow.config tags: - - verify_output - - verify_output_save - verify_output_save_trimmed files: - path: results/01_prealign/trimgalore/h3k27me3_R1_1.trimmed.fastq.gz diff --git a/tests/test_verify_output_skip_fastqc.yml b/tests/test_verify_output_01_skip_fastqc.yml similarity index 83% rename from tests/test_verify_output_skip_fastqc.yml rename to tests/test_verify_output_01_skip_fastqc.yml index 34de90ec..d0eec28a 100644 --- a/tests/test_verify_output_skip_fastqc.yml +++ b/tests/test_verify_output_01_skip_fastqc.yml @@ -1,8 +1,6 @@ - name: test_verify_output_skip_fastqc_true - command: nextflow run main.nf -profile docker,test --only_preqc true --skip_fastqc true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_preqc --skip_fastqc true -c tests/config/nextflow.config tags: - - verify_output - - verify_output_skip - verify_output_skip_fastqc - verify_output_skip_fastqc_true files: @@ -12,9 +10,8 @@ should_exist: false - name: test_verify_output_skip_fastqc_false - command: nextflow run main.nf -profile docker,test --only_preqc true --skip_fastqc false + command: nextflow run main.nf -profile docker,test --only_preqc --skip_fastqc false tags: - - verify_output - verify_output_skip_fastqc - verify_output_skip_fastqc_false files: diff --git a/tests/test_verify_output_skip_trimming.yml b/tests/test_verify_output_01_skip_trimming.yml similarity index 67% rename from tests/test_verify_output_skip_trimming.yml rename to tests/test_verify_output_01_skip_trimming.yml index 489bc339..8cdaf815 100644 --- a/tests/test_verify_output_skip_trimming.yml +++ b/tests/test_verify_output_01_skip_trimming.yml @@ -1,8 +1,6 @@ - name: test_verify_output_skip_trimming - command: nextflow run main.nf -profile docker,test --only_preqc true --save_trimmed true --skip_trimming true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_preqc --save_trimmed --skip_trimming -c tests/config/nextflow.config tags: - - verify_output - - verify_output_skip - verify_output_skip_trimming files: - path: results/01_prealign/trimgalore/h3k27me3_R1_1.trimmed.fastq.gz diff --git a/tests/test_verify_output_save_ref.yml b/tests/test_verify_output_02_save_ref.yml similarity index 67% rename from tests/test_verify_output_save_ref.yml rename to tests/test_verify_output_02_save_ref.yml index 2d2714a2..b1e95a9e 100644 --- a/tests/test_verify_output_save_ref.yml +++ b/tests/test_verify_output_02_save_ref.yml @@ -1,16 +1,10 @@ - name: test_verify_output_save_ref command: nextflow run main.nf -profile docker,test --save_reference true --only_genome true -c tests/config/nextflow.config tags: - - verify_output - - verify_output_save - verify_output_save_ref files: - - path: results/00_genome/hg38-chr20-genes.bed - - path: results/00_genome/hg38-chr20-genes.gtf - - path: results/00_genome/hg38-chr20.fa - path: results/00_genome/hg38-chr20.fa.fai - path: results/00_genome/hg38-chr20.fa.sizes - path: results/00_genome/index/hg38-chr20-bowtie2/hg38-chr20.1.bt2 - - path: results/00_genome/e_coli_U00096_3.fa - path: results/00_genome/e_coli_U00096_3.fa.sizes - path: results/00_genome/index/e_coli_U00096_3/e_coli_U00096_3.1.bt2 diff --git a/tests/test_verify_output_only_align.yml b/tests/test_verify_output_03_only_align.yml similarity index 62% rename from tests/test_verify_output_only_align.yml rename to tests/test_verify_output_03_only_align.yml index 097882d9..9e4fd6b3 100644 --- a/tests/test_verify_output_only_align.yml +++ b/tests/test_verify_output_03_only_align.yml @@ -1,13 +1,11 @@ - name: test_verify_output_only_align - command: nextflow run main.nf -profile docker,test --only_alignment true --skip_fastqc true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_alignment --skip_fastqc -c tests/config/nextflow.config tags: - - verify_output - - verify_output_align - verify_output_align_only_align files: - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.sorted.bam - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.sorted.bam - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.sorted.bam.bai - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.sorted.bam.flagstat - - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.sorted.bam.flagstat + - path: results/02_alignment/bowtie2/target/h3k27me3_R1.flagstat + - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.flagstat diff --git a/tests/test_verify_output_align_intermed.yml b/tests/test_verify_output_03_save_align_intermed.yml similarity index 77% rename from tests/test_verify_output_align_intermed.yml rename to tests/test_verify_output_03_save_align_intermed.yml index ffae3d87..30cfb1cf 100644 --- a/tests/test_verify_output_align_intermed.yml +++ b/tests/test_verify_output_03_save_align_intermed.yml @@ -1,8 +1,6 @@ - name: test_verify_output_align_intermed - command: nextflow run main.nf -profile docker,test --only_peak_calling true --skip_fastqc true --save_align_intermed true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_preseq --save_align_intermed -c tests/config/nextflow.config tags: - - verify_output - - verify_output_align - verify_output_align_intermed files: - path: results/02_alignment/bowtie2/target/h3k27me3_R1.bam diff --git a/tests/test_verify_output_save_spikein_align.yml b/tests/test_verify_output_03_save_spikein_align.yml similarity index 61% rename from tests/test_verify_output_save_spikein_align.yml rename to tests/test_verify_output_03_save_spikein_align.yml index a228d91b..d2144231 100644 --- a/tests/test_verify_output_save_spikein_align.yml +++ b/tests/test_verify_output_03_save_spikein_align.yml @@ -1,13 +1,11 @@ - name: test_verify_output_save_spikein_align - command: nextflow run main.nf -profile docker,test --only_alignment true --save_spikein_aligned true --skip_fastqc true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_alignment --save_spikein_aligned --skip_fastqc -c tests/config/nextflow.config tags: - - verify_output - - verify_output_align - verify_output_align_save_spikein_align files: - path: results/02_alignment/bowtie2/spikein/h3k27me3_R1.spikein.sorted.bam - path: results/02_alignment/bowtie2/spikein/igg_ctrl_R1.spikein.sorted.bam - path: results/02_alignment/bowtie2/spikein/h3k27me3_R1.spikein.sorted.bam.bai - path: results/02_alignment/bowtie2/spikein/igg_ctrl_R1.spikein.sorted.bam.bai - - path: results/02_alignment/bowtie2/spikein/h3k27me3_R1.spikein.sorted.bam.flagstat - - path: results/02_alignment/bowtie2/spikein/igg_ctrl_R1.spikein.sorted.bam.flagstat + - path: results/02_alignment/bowtie2/spikein/h3k27me3_R1.flagstat + - path: results/02_alignment/bowtie2/spikein/igg_ctrl_R1.flagstat diff --git a/tests/test_verify_output_save_unaligned.yml b/tests/test_verify_output_03_save_unaligned.yml similarity index 80% rename from tests/test_verify_output_save_unaligned.yml rename to tests/test_verify_output_03_save_unaligned.yml index ca1105ad..ca753098 100644 --- a/tests/test_verify_output_save_unaligned.yml +++ b/tests/test_verify_output_03_save_unaligned.yml @@ -1,8 +1,6 @@ - name: test_verify_output_save_unaligned - command: nextflow run main.nf -profile docker,test --only_alignment true --skip_fastqc true --save_unaligned true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_alignment --skip_fastqc --save_unaligned -c tests/config/nextflow.config tags: - - verify_output - - verify_output_align - verify_output_align_save_unaligned files: - path: results/02_alignment/bowtie2/target/unmapped/h3k27me3_R1.unmapped_1.fastq.gz diff --git a/tests/test_verify_output_duplicates.yml b/tests/test_verify_output_04_duplicates.yml similarity index 60% rename from tests/test_verify_output_duplicates.yml rename to tests/test_verify_output_04_duplicates.yml index d28e9df8..df88932e 100644 --- a/tests/test_verify_output_duplicates.yml +++ b/tests/test_verify_output_04_duplicates.yml @@ -1,46 +1,40 @@ - name: test_verify_output_duplicates_mark - command: nextflow run main.nf -profile docker,test --only_filtering true --skip_fastqc true --skip_removeduplicates true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_removeduplicates --skip_preseq -c tests/config/nextflow.config tags: - - verify_output - - verify_output_align - verify_output_align_duplicates - verify_output_align_duplicates_mark files: - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.bam - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.markdup.bam - - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.bam.bai - - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.markdup.bam.bai - - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.bam.flagstat - - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.markdup.bam.flagstat + - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/h3k27me3_R1.flagstat + - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.flagstat - name: test_verify_output_duplicates_remove - command: nextflow run main.nf -profile docker,test --only_filtering true --skip_fastqc true --dedup_target_reads false -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_preseq --dedup_target_reads false -c tests/config/nextflow.config tags: - - verify_output - - verify_output_align - verify_output_align_duplicates - verify_output_align_duplicates_remove files: - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.bam - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.dedup.bam - - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.bam.bai - - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.dedup.bam.bai - - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.bam.flagstat - - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.dedup.bam.flagstat + - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/h3k27me3_R1.flagstat + - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.flagstat - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.dedup.bam should_exist: false - name: test_verify_output_duplicates_remove_target - command: nextflow run main.nf -profile docker,test --only_filtering true --skip_fastqc true --dedup_target_reads true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_preseq --dedup_target_reads true -c tests/config/nextflow.config tags: - - verify_output - - verify_output_align - verify_output_align_duplicates - verify_output_align_duplicates_remove_target files: - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.dedup.bam - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.dedup.bam - - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.dedup.bam.bai - - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.dedup.bam.bai - - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.dedup.bam.flagstat - - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.dedup.bam.flagstat + - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/h3k27me3_R1.flagstat + - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.flagstat diff --git a/tests/test_verify_output_only_filtering.yml b/tests/test_verify_output_04_only_filtering.yml similarity index 57% rename from tests/test_verify_output_only_filtering.yml rename to tests/test_verify_output_04_only_filtering.yml index be0032ec..f3a3a024 100644 --- a/tests/test_verify_output_only_filtering.yml +++ b/tests/test_verify_output_04_only_filtering.yml @@ -1,12 +1,11 @@ - name: verify_output_only_filtering - command: nextflow run main.nf -profile docker,test --only_filtering true --skip_fastqc true --skip_removeduplicates true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_removeduplicates --skip_preseq -c tests/config/nextflow.config tags: - - verify_output - verify_output_only_filtering files: - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.bam - - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.bam.bai - - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.bam.flagstat + - path: results/02_alignment/bowtie2/target/h3k27me3_R1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/h3k27me3_R1.flagstat - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.markdup.bam - - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.markdup.bam.bai - - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.markdup.bam.flagstat + - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/igg_ctrl_R1.flagstat diff --git a/tests/test_verify_output_only_peak_calling.yml b/tests/test_verify_output_05_only_peak_calling.yml similarity index 58% rename from tests/test_verify_output_only_peak_calling.yml rename to tests/test_verify_output_05_only_peak_calling.yml index 23bfa767..7de6f567 100644 --- a/tests/test_verify_output_only_peak_calling.yml +++ b/tests/test_verify_output_05_only_peak_calling.yml @@ -1,7 +1,6 @@ - name: test_verify_output_only_peak_calling - command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling true --skip_fastqc true -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling --skip_fastqc --skip_preseq -c tests/config/nextflow.config tags: - - verify_output - verify_output_peak_calling_only_peak_calling files: - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_R1.sorted.bedGraph @@ -10,11 +9,8 @@ - path: results/03_peak_calling/02_clip_bed/igg_ctrl_R1.clipped.bedGraph - path: results/03_peak_calling/03_bed_to_bigwig/h3k27me3_R1.bigWig - path: results/03_peak_calling/03_bed_to_bigwig/igg_ctrl_R1.bigWig - - path: results/03_peak_calling/04_called_peaks/h3k27me3_R1.seacr.peaks.bed.stringent.bed - - path: results/03_peak_calling/04_called_peaks/igg_ctrl_R1.seacr.peaks.bed.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_R1.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/igg_ctrl_R1.seacr.peaks.stringent.bed should_exist: false - - path: results/03_peak_calling/05_consensus_peaks/h3k27me3.consensus.peaks.bed - - path: results/03_peak_calling/05_consensus_peaks/h3k27me3.consensus.peaks.filtered.awk.bed - - path: results/03_peak_calling/06_fragments/h3k27me3_R1.frags.len.txt - - path: results/03_peak_calling/06_fragments/h3k27me3_R1.frags.bin500.awk.bed - - path: results/03_peak_calling/06_fragments/h3k27me3_R1.frags.cut.bed + - path: results/03_peak_calling/05_consensus_peaks/h3k27me3.consensus.peaks.awk.bed + - path: results/03_peak_calling/05_consensus_peaks/h3k27me3.consensus.peak_counts.bed diff --git a/tests/test_verify_output_06_skip_dt_qc.yml b/tests/test_verify_output_06_skip_dt_qc.yml new file mode 100644 index 00000000..420ea2ad --- /dev/null +++ b/tests/test_verify_output_06_skip_dt_qc.yml @@ -0,0 +1,22 @@ +- name: verify_output_reporting_skip_dtqc_false + command: nextflow run main.nf -profile docker,test_full_small --skip_fastqc --skip_multiqc --skip_preseq -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_dtqc + - verify_output_reporting_skip_dtqc_false + files: + - path: results/04_reporting/deeptools_qc/all_target_bams.plotCorrelation.pdf + - path: results/04_reporting/deeptools_qc/all_target_bams.plotPCA.pdf + - path: results/04_reporting/deeptools_qc/h3k4me3_R1.plotFingerprint.pdf + +- name: verify_output_reporting_skip_dtqc_true + command: nextflow run main.nf -profile docker,test_full_small --skip_fastqc --skip_multiqc --skip_preseq --skip_dt_qc -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_dtqc + - verify_output_reporting_skip_dtqc_true + files: + - path: results/04_reporting/deeptools_qc/all_target_bams.plotCorrelation.pdf + should_exist: false + - path: results/04_reporting/deeptools_qc/all_target_bams.plotPCA.pdf + should_exist: false + - path: results/04_reporting/deeptools_qc/h3k4me3_R1.plotFingerprint.pdf + should_exist: false diff --git a/tests/test_verify_output_06_skip_heatmaps.yml b/tests/test_verify_output_06_skip_heatmaps.yml new file mode 100644 index 00000000..216e15f3 --- /dev/null +++ b/tests/test_verify_output_06_skip_heatmaps.yml @@ -0,0 +1,19 @@ +- name: verify_output_reporting_skip_heatmaps_false + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_multiqc --skip_preseq -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_heatmaps + - verify_output_reporting_skip_heatmaps_false + files: + - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_R1.computeMatrix.mat.gz + - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_R1.computeMatrix.mat.gz + +- name: verify_output_reporting_skip_heatmaps_true + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_multiqc --skip_preseq --skip_heatmaps -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_heatmaps + - verify_output_reporting_skip_heatmaps_true + files: + - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_R1.computeMatrix.mat.gz + should_exist: false + - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_R1.computeMatrix.mat.gz + should_exist: false diff --git a/tests/test_verify_output_06_skip_igv.yml b/tests/test_verify_output_06_skip_igv.yml new file mode 100644 index 00000000..b2b0cc34 --- /dev/null +++ b/tests/test_verify_output_06_skip_igv.yml @@ -0,0 +1,16 @@ +- name: verify_output_reporting_skip_igv_false + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_multiqc --skip_preseq -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_igv + - verify_output_reporting_skip_igv_false + files: + - path: results/04_reporting/igv/igv_session.xml + +- name: verify_output_reporting_skip_igv_true + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_multiqc --skip_preseq --skip_igv -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_igv + - verify_output_reporting_skip_igv_true + files: + - path: results/04_reporting/igv/igv_session.xml + should_exist: false diff --git a/tests/test_verify_output_06_skip_multiqc.yml b/tests/test_verify_output_06_skip_multiqc.yml new file mode 100644 index 00000000..308cae3d --- /dev/null +++ b/tests/test_verify_output_06_skip_multiqc.yml @@ -0,0 +1,16 @@ +- name: verify_output_reporting_skip_multiqc_false + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_preseq -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_multiqc + - verify_output_reporting_skip_multiqc_false + files: + - path: results/04_reporting/multiqc/multiqc_report.html + +- name: verify_output_reporting_skip_multiqc_true + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_preseq --skip_multiqc -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_multiqc + - verify_output_reporting_skip_multiqc_true + files: + - path: results/04_reporting/multiqc/multiqc_report.html + should_exist: false diff --git a/tests/test_verify_output_06_skip_peak_qc.yml b/tests/test_verify_output_06_skip_peak_qc.yml new file mode 100644 index 00000000..a7bd0c7d --- /dev/null +++ b/tests/test_verify_output_06_skip_peak_qc.yml @@ -0,0 +1,16 @@ +- name: verify_output_reporting_skip_peak_qc_false + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_preseq -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_peak_qc + - verify_output_reporting_skip_peak_qc_false + files: + - path: results/04_reporting/multiqc/multiqc_data/multiqc_primary_peakcounts_plot.txt + +- name: verify_output_reporting_skip_peak_qc_true + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_preseq --skip_peak_qc -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_peak_qc + - verify_output_reporting_skip_peak_qc_true + files: + - path: results/04_reporting/multiqc/multiqc_data/multiqc_primary_peakcounts_plot.txt + should_exist: false diff --git a/tests/test_verify_output_06_skip_preseq.yml b/tests/test_verify_output_06_skip_preseq.yml new file mode 100644 index 00000000..50b2c1e1 --- /dev/null +++ b/tests/test_verify_output_06_skip_preseq.yml @@ -0,0 +1,22 @@ +- name: test_verify_output_skip_preseq_false + command: nextflow run main.nf -profile docker,test_full_small --skip_fastqc --skip_multiqc --only_filtering -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_preseq + - verify_output_reporting_skip_preseq_false + files: + - path: results/04_reporting/preseq/h3k4me3_R1.command.log + - path: results/04_reporting/preseq/h3k4me3_R2.command.log + - path: results/04_reporting/preseq/igg_ctrl_R1.command.log + +- name: test_verify_output_skip_preseq_true + command: nextflow run main.nf -profile docker,test_full_small --skip_fastqc --skip_multiqc --only_filtering --skip_preseq -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_preseq + - verify_output_reporting_skip_preseq_true + files: + - path: results/04_reporting/preseq/h3k4me3_R1.command.log + should_exist: false + - path: results/04_reporting/preseq/h3k4me3_R2.command.log + should_exist: false + - path: results/04_reporting/preseq/igg_ctrl_R1.command.log + should_exist: false diff --git a/tests/test_verify_output_06_skip_reporting.yml b/tests/test_verify_output_06_skip_reporting.yml new file mode 100644 index 00000000..116a33d5 --- /dev/null +++ b/tests/test_verify_output_06_skip_reporting.yml @@ -0,0 +1,13 @@ +- name: test_verify_output_skip_reporting + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_reporting -c tests/config/nextflow.config + tags: + - verify_output_reporting_skip_reporting + files: + - path: results/04_reporting/igv/igv_session.xml + should_exist: false + - path: results/04_reporting/deeptools_qc/all_target_bams.plotCorrelation.pdf + should_exist: false + - path: results/04_reporting/heatmaps/peaks/h3k27me3_R1.plotHeatmap.pdf + should_exist: false + - path: results/04_reporting/deeptools_heatmaps/gene/h3k4me3_R1.computeMatrix.mat.gz + should_exist: false diff --git a/tests/test_verify_output_skip_frip.yml b/tests/test_verify_output_skip_frip.yml deleted file mode 100644 index a062f34d..00000000 --- a/tests/test_verify_output_skip_frip.yml +++ /dev/null @@ -1,17 +0,0 @@ -- name: test_verify_output_skip_frip_true - command: nextflow run main.nf -profile docker,test_full_small --skip_frip true -c tests/config/nextflow.config - tags: - - verify_output_skip_frip - files: - - path: results/04_reporting/qc/06_04_frags_in_peaks.csv - should_exist: false - - path: results/04_reporting/qc/06_04_frags_in_peaks.png - should_exist: false - -- name: test_verify_output_skip_frip_false - command: nextflow run main.nf -profile docker,test_full_small --skip_frip false -c tests/config/nextflow.config - tags: - - verify_output_skip_frip - files: - - path: results/04_reporting/qc/06_04_frags_in_peaks.csv - - path: results/04_reporting/qc/06_04_frags_in_peaks.png diff --git a/tests/test_verify_output_skip_reporting.yml b/tests/test_verify_output_skip_reporting.yml deleted file mode 100644 index 193dd917..00000000 --- a/tests/test_verify_output_skip_reporting.yml +++ /dev/null @@ -1,54 +0,0 @@ -- name: test_verify_output_skip_reporting - command: nextflow run main.nf -profile docker,test --skip_fastqc true --skip_reporting true -c tests/config/nextflow.config - tags: - - verify_output - - verify_output_reporting - - verify_output_reporting_skip_reporting - files: - - path: results/04_reporting/qc/merged_report.pdf - should_exist: false - - path: results/04_reporting/multiqc/multiqc_report.html - should_exist: false - - path: results/04_reporting/heatmaps/peaks/h3k27me3_R1.plotHeatmap.pdf - should_exist: false - - path: results/04_reporting/igv/igv_session.xml - should_exist: false - -- name: test_verify_output_skip_reporting_igv - command: nextflow run main.nf -profile docker,test --skip_fastqc true --skip_igv true -c tests/config/nextflow.config - tags: - - verify_output - - verify_output_reporting - - verify_output_reporting_skip_igv - files: - - path: results/04_reporting/qc/merged_report.pdf - - path: results/04_reporting/multiqc/multiqc_report.html - - path: results/04_reporting/heatmaps/peaks/h3k27me3_R1.plotHeatmap.pdf - - path: results/04_reporting/igv/igv_session.xml - should_exist: false - -- name: test_verify_output_skip_reporting_heatmaps - command: nextflow run main.nf -profile docker,test --skip_fastqc true --skip_heatmaps true -c tests/config/nextflow.config - tags: - - verify_output - - verify_output_reporting - - verify_output_reporting_skip_heatmaps - files: - - path: results/04_reporting/qc/merged_report.pdf - - path: results/04_reporting/multiqc/multiqc_report.html - - path: results/04_reporting/heatmaps/peaks/h3k27me3_R1.plotHeatmap.pdf - should_exist: false - - path: results/04_reporting/igv/igv_session.xml - -- name: test_verify_output_skip_reporting_multiqc - command: nextflow run main.nf -profile docker,test --skip_fastqc true --skip_multiqc true -c tests/config/nextflow.config - tags: - - verify_output - - verify_output_reporting - - verify_output_reporting_skip_multiqc - files: - - path: results/04_reporting/qc/merged_report.pdf - - path: results/04_reporting/multiqc/multiqc_report.html - should_exist: false - - path: results/04_reporting/heatmaps/peaks/h3k27me3_R1.plotHeatmap.pdf - - path: results/04_reporting/igv/igv_session.xml diff --git a/workflows/cutandrun.nf b/workflows/cutandrun.nf index b8f7872b..74e7c3f7 100644 --- a/workflows/cutandrun.nf +++ b/workflows/cutandrun.nf @@ -27,10 +27,10 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, "Input sample ch_blacklist = Channel.empty() if (params.blacklist) { - ch_blacklist = file(params.blacklist) + ch_blacklist = Channel.from( file(params.blacklist) ) } else { - ch_blacklist = file("$projectDir/assets/dummy_file.txt", checkIfExists: true) + ch_blacklist = Channel.empty() WorkflowCutandrun.blacklistWarn(log) } @@ -59,7 +59,11 @@ ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIf ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() // Header files for MultiQC -ch_frag_len_header_multiqc = file("$projectDir/assets/multiqc/frag_len_header.txt", checkIfExists: true) +ch_frag_len_header_multiqc = file("$projectDir/assets/multiqc/frag_len_header.txt", checkIfExists: true) +ch_frip_score_header_multiqc = file("$projectDir/assets/multiqc/frip_score_header.txt", checkIfExists: true) +ch_peak_counts_header_multiqc = file("$projectDir/assets/multiqc/peak_counts_header.txt", checkIfExists: true) +ch_peak_counts_consensus_header_multiqc = file("$projectDir/assets/multiqc/peak_counts_consensus_header.txt", checkIfExists: true) +ch_peak_reprod_header_multiqc = file("$projectDir/assets/multiqc/peak_reprod_header.txt", checkIfExists: true) /* ======================================================================================== @@ -87,33 +91,27 @@ if ((caller_list + callers).unique().size() != caller_list.size()) { * MODULES */ include { INPUT_CHECK } from "../subworkflows/local/input_check" +include { CUT as PEAK_TO_BED } from '../modules/local/linux/cut' include { AWK as AWK_NAME_PEAK_BED } from "../modules/local/linux/awk" -include { AWK as AWK_FRAG_BIN } from "../modules/local/linux/awk" -include { SAMTOOLS_CUSTOMVIEW } from "../modules/local/samtools_custom_view" include { IGV_SESSION } from "../modules/local/python/igv_session" -include { AWK as AWK_EDIT_PEAK_BED } from "../modules/local/linux/awk" -include { CALCULATE_FRIP } from "../modules/local/modules/calculate_frip/main" -include { CUT as CUT_CALC_REPROD } from "../modules/local/linux/cut" -include { CALCULATE_PEAK_REPROD } from "../modules/local/modules/calculate_peak_reprod/main" -include { EXPORT_META } from "../modules/local/export_meta" -include { EXPORT_META as EXPORT_META_CTRL } from "../modules/local/export_meta" -include { GENERATE_REPORTS } from "../modules/local/modules/generate_reports/main" +include { AWK as AWK_EXTRACT_SUMMITS } from "../modules/local/linux/awk" +include { SAMTOOLS_CUSTOMVIEW } from "../modules/local/samtools_custom_view" +include { FRAG_LEN_HIST } from "../modules/local/python/frag_len_hist" include { MULTIQC } from "../modules/local/multiqc" /* * SUBWORKFLOW: Consisting of a mix of local and nf-core/modules */ -include { PREPARE_GENOME } from "../subworkflows/local/prepare_genome" -include { FASTQC_TRIMGALORE } from "../subworkflows/local/fastqc_trimgalore" -include { ALIGN_BOWTIE2 } from "../subworkflows/local/align_bowtie2" -include { ANNOTATE_META_AWK as ANNOTATE_BT2_META } from "../subworkflows/local/annotate_meta_awk" -include { ANNOTATE_META_AWK as ANNOTATE_BT2_SPIKEIN_META } from "../subworkflows/local/annotate_meta_awk" -include { ANNOTATE_META_AWK as ANNOTATE_DEDUP_META } from "../subworkflows/local/annotate_meta_awk" -include { CONSENSUS_PEAKS } from "../subworkflows/local/consensus_peaks" -include { CONSENSUS_PEAKS as CONSENSUS_PEAKS_ALL } from "../subworkflows/local/consensus_peaks" -include { CALCULATE_FRAGMENTS } from "../subworkflows/local/calculate_fragments" -include { ANNOTATE_META_CSV as ANNOTATE_FRIP_META } from "../subworkflows/local/annotate_meta_csv" -include { ANNOTATE_META_CSV as ANNOTATE_PEAK_REPRO_META } from "../subworkflows/local/annotate_meta_csv" +include { PREPARE_GENOME } from "../subworkflows/local/prepare_genome" +include { FASTQC_TRIMGALORE } from "../subworkflows/local/fastqc_trimgalore" +include { ALIGN_BOWTIE2 } from "../subworkflows/local/align_bowtie2" +include { EXTRACT_METADATA_AWK as EXTRACT_BT2_TARGET_META } from "../subworkflows/local/extract_metadata_awk" +include { EXTRACT_METADATA_AWK as EXTRACT_BT2_SPIKEIN_META } from "../subworkflows/local/extract_metadata_awk" +include { EXTRACT_METADATA_AWK as EXTRACT_PICARD_DUP_META } from "../subworkflows/local/extract_metadata_awk" +include { CONSENSUS_PEAKS } from "../subworkflows/local/consensus_peaks" +include { CONSENSUS_PEAKS as CONSENSUS_PEAKS_ALL } from "../subworkflows/local/consensus_peaks" +include { EXTRACT_FRAGMENTS } from "../subworkflows/local/extract_fragments" +include { PEAK_QC } from "../subworkflows/local/peak_qc" /* ======================================================================================== @@ -124,25 +122,26 @@ include { ANNOTATE_META_CSV as ANNOTATE_PEAK_REPRO_META } from "../subworkflows /* * MODULES */ -include { CAT_FASTQ } from "../modules/nf-core/modules/cat/fastq/main" -include { SEACR_CALLPEAK } from "../modules/nf-core/modules/seacr/callpeak/main" -include { SEACR_CALLPEAK as SEACR_CALLPEAK_NOIGG } from "../modules/nf-core/modules/seacr/callpeak/main" -include { MACS2_CALLPEAK } from "../modules/nf-core/modules/macs2/callpeak/main" -include { MACS2_CALLPEAK as MACS2_CALLPEAK_NOIGG } from "../modules/nf-core/modules/macs2/callpeak/main" -include { DEEPTOOLS_COMPUTEMATRIX as DEEPTOOLS_COMPUTEMATRIX_GENE } from "../modules/nf-core/modules/deeptools/computematrix/main" -include { DEEPTOOLS_COMPUTEMATRIX as DEEPTOOLS_COMPUTEMATRIX_PEAKS } from "../modules/nf-core/modules/deeptools/computematrix/main" -include { DEEPTOOLS_PLOTHEATMAP as DEEPTOOLS_PLOTHEATMAP_GENE } from "../modules/nf-core/modules/deeptools/plotheatmap/main" -include { DEEPTOOLS_PLOTHEATMAP as DEEPTOOLS_PLOTHEATMAP_PEAKS } from "../modules/nf-core/modules/deeptools/plotheatmap/main" -include { BEDTOOLS_INTERSECT } from "../modules/nf-core/modules/bedtools/intersect/main.nf" -include { CUSTOM_DUMPSOFTWAREVERSIONS } from "../modules/local/modules/custom/dumpsoftwareversions/main" +include { CAT_FASTQ } from "../modules/nf-core/cat/fastq/main" +include { PRESEQ_LCEXTRAP } from "../modules/local/for_patch/preseq/lcextrap/main" +include { SEACR_CALLPEAK } from "../modules/nf-core/seacr/callpeak/main" +include { SEACR_CALLPEAK as SEACR_CALLPEAK_NOIGG } from "../modules/nf-core/seacr/callpeak/main" +include { MACS2_CALLPEAK } from "../modules/nf-core/macs2/callpeak/main" +include { MACS2_CALLPEAK as MACS2_CALLPEAK_NOIGG } from "../modules/nf-core/macs2/callpeak/main" +include { DEEPTOOLS_COMPUTEMATRIX as DEEPTOOLS_COMPUTEMATRIX_GENE } from "../modules/nf-core/deeptools/computematrix/main" +include { DEEPTOOLS_COMPUTEMATRIX as DEEPTOOLS_COMPUTEMATRIX_PEAKS } from "../modules/nf-core/deeptools/computematrix/main" +include { DEEPTOOLS_PLOTHEATMAP as DEEPTOOLS_PLOTHEATMAP_GENE } from "../modules/nf-core/deeptools/plotheatmap/main" +include { DEEPTOOLS_PLOTHEATMAP as DEEPTOOLS_PLOTHEATMAP_PEAKS } from "../modules/nf-core/deeptools/plotheatmap/main" +include { CUSTOM_DUMPSOFTWAREVERSIONS } from "../modules/local/custom_dumpsoftwareversions" /* * SUBWORKFLOW: Consisting entirely of nf-core/modules */ include { MARK_DUPLICATES_PICARD } from "../subworkflows/nf-core/mark_duplicates_picard" include { MARK_DUPLICATES_PICARD as DEDUPLICATE_PICARD } from "../subworkflows/nf-core/mark_duplicates_picard" -include { SAMTOOLS_VIEW_SORT_STATS } from "../subworkflows/nf-core/samtools_view_sort_stats" +include { SAMTOOLS_VIEW_SORT_STATS as FILTER_READS } from "../subworkflows/nf-core/samtools_view_sort_stats" include { PREPARE_PEAKCALLING } from "../subworkflows/nf-core/prepare_peakcalling" +include { DEEPTOOLS_QC } from "../subworkflows/nf-core/deeptools_qc" /* ======================================================================================== @@ -154,14 +153,14 @@ workflow CUTANDRUN { // Init ch_software_versions = Channel.empty() - ch_frag_len_multiqc = Channel.empty() /* * SUBWORKFLOW: Uncompress and prepare reference genome files */ if(params.run_genome_prep) { PREPARE_GENOME ( - prepare_tool_indices + prepare_tool_indices, + ch_blacklist ) ch_software_versions = ch_software_versions.mix(PREPARE_GENOME.out.versions) } @@ -175,19 +174,19 @@ workflow CUTANDRUN { ) INPUT_CHECK.out.reads - .map { - meta, fastq -> - meta.id = meta.id.split("_")[0..-2].join("_") - [ meta, fastq ] } - .groupTuple(by: [0]) - .branch { - meta, fastq -> - single : fastq.size() == 1 - return [ meta, fastq.flatten() ] - multiple: fastq.size() > 1 - return [ meta, fastq.flatten() ] - } - .set { ch_fastq } + .map { + meta, fastq -> + meta.id = meta.id.split("_")[0..-2].join("_") + [ meta, fastq ] } + .groupTuple(by: [0]) + .branch { + meta, fastq -> + single : fastq.size() == 1 + return [ meta, fastq.flatten() ] + multiple: fastq.size() > 1 + return [ meta, fastq.flatten() ] + } + .set { ch_fastq } } /* @@ -200,8 +199,8 @@ workflow CUTANDRUN { ch_software_versions = ch_software_versions.mix(CAT_FASTQ.out.versions) CAT_FASTQ.out.reads - .mix(ch_fastq.single) - .set { ch_cat_fastq } + .mix(ch_fastq.single) + .set { ch_cat_fastq } } //EXAMPLE CHANNEL STRUCT: [[id:h3k27me3_R1, group:h3k27me3, replicate:1, single_end:false, is_control:false], [READS]] //ch_cat_fastq | view @@ -265,25 +264,66 @@ workflow CUTANDRUN { } } //EXAMPLE CHANNEL STRUCT: [[id:h3k27me3_R1, group:h3k27me3, replicate:1, single_end:false, is_control:false], [BAM]] - // ch_samtools_bam | view + //ch_samtools_bam | view /* - * SUBWORKFLOW: Filter reads based on quality metrics - * http://biofinysics.blogspot.com/2014/05/how-does-bowtie2-assign-mapq-scores.html + * SUBWORKFLOW: extract aligner metadata */ - if (params.run_q_filter) { - SAMTOOLS_VIEW_SORT_STATS ( - ch_samtools_bam + ch_metadata_bt2_target = Channel.empty() + ch_metadata_bt2_spikein = Channel.empty() + if (params.aligner == "bowtie2" && params.run_alignment) { + EXTRACT_BT2_TARGET_META ( + ch_bowtie2_log, + ch_bt2_to_csv_awk, + true ) - ch_samtools_bam = SAMTOOLS_VIEW_SORT_STATS.out.bam - ch_samtools_bai = SAMTOOLS_VIEW_SORT_STATS.out.bai - ch_samtools_stats = SAMTOOLS_VIEW_SORT_STATS.out.stats - ch_samtools_flagstat = SAMTOOLS_VIEW_SORT_STATS.out.flagstat - ch_samtools_idxstats = SAMTOOLS_VIEW_SORT_STATS.out.idxstats - ch_software_versions = ch_software_versions.mix(SAMTOOLS_VIEW_SORT_STATS.out.versions) + ch_metadata_bt2_target = EXTRACT_BT2_TARGET_META.out.metadata + ch_software_versions = ch_software_versions.mix(EXTRACT_BT2_TARGET_META.out.versions) + + EXTRACT_BT2_SPIKEIN_META ( + ch_bowtie2_spikein_log, + ch_bt2_to_csv_awk, + true + ) + ch_metadata_bt2_spikein = EXTRACT_BT2_SPIKEIN_META.out.metadata + } + //ch_metadata_bt2_target | view + //ch_metadata_bt2_spikein | view + + /* + * SUBWORKFLOW: Filter reads based some standard measures + * - Unmapped reads 0x004 + * - Mate unmapped 0x0008 + * - Multi-mapped reads + * - Filter out reads aligned to blacklist regions + * - Filter out reads below a threshold q score + */ + if (params.run_read_filter) { + FILTER_READS ( + ch_samtools_bam, + PREPARE_GENOME.out.allowed_regions.collect{it[1]}.ifEmpty([]) + ) + ch_samtools_bam = FILTER_READS.out.bam + ch_samtools_bai = FILTER_READS.out.bai + ch_samtools_stats = FILTER_READS.out.stats + ch_samtools_flagstat = FILTER_READS.out.flagstat + ch_samtools_idxstats = FILTER_READS.out.idxstats + ch_software_versions = ch_software_versions.mix(FILTER_READS.out.versions) } //EXAMPLE CHANNEL STRUCT: [[id:h3k27me3_R1, group:h3k27me3, replicate:1, single_end:false, is_control:false], [BAM]] //ch_samtools_bam | view + + /* + * MODULE: Run preseq on BAM files before de-duplication + */ + ch_preseq_output = Channel.empty() + if (params.run_preseq) { + PRESEQ_LCEXTRAP ( + ch_samtools_bam + ) + ch_preseq_output = PRESEQ_LCEXTRAP.out.lc_extrap + ch_software_versions = ch_software_versions.mix(PRESEQ_LCEXTRAP.out.versions) + } /* * SUBWORKFLOW: Mark duplicates on all samples @@ -308,7 +348,6 @@ workflow CUTANDRUN { /* * SUBWORKFLOW: Remove duplicates - default is on IgG controls only */ - ch_dedup_multiqc = Channel.empty() if (params.run_remove_dups) { DEDUPLICATE_PICARD ( ch_samtools_bam, @@ -319,66 +358,35 @@ workflow CUTANDRUN { ch_samtools_stats = DEDUPLICATE_PICARD.out.stats ch_samtools_flagstat = DEDUPLICATE_PICARD.out.flagstat ch_samtools_idxstats = DEDUPLICATE_PICARD.out.idxstats - ch_dedup_multiqc = DEDUPLICATE_PICARD.out.metrics ch_software_versions = ch_software_versions.mix(DEDUPLICATE_PICARD.out.versions) } //EXAMPLE CHANNEL STRUCT: [[id:h3k27me3_R1, group:h3k27me3, replicate:1, single_end:false, is_control:false], [BAM]] //ch_samtools_bam | view /* - * SUBWORKFLOW: Annotate meta-data with aligner stats for target and spike-in - * the meta-data is annotated additivley so we only need to track the final channel output - */ - if (params.aligner == "bowtie2" && params.run_alignment) { - ANNOTATE_BT2_META ( - ch_samtools_bam, - ch_bowtie2_log, - ch_bt2_to_csv_awk, - "", - "_target", - true - ) - ch_software_versions = ch_software_versions.mix(ANNOTATE_BT2_META.out.versions) - - ANNOTATE_BT2_SPIKEIN_META ( - ANNOTATE_BT2_META.out.output, - ch_bowtie2_spikein_log, - ch_bt2_to_csv_awk, - "", - "_spikein", - true - ) - ch_samtools_bam = ANNOTATE_BT2_SPIKEIN_META.out.output - } - // META-DATA example state: - //[[id:h3k27me3_R1, group:h3k27me3, replicate:1, single_end:false, is_control:false, - // bt2_total_reads_spikein:9616, bt2_align1_spikein:1, bt2_align_gt1_spikein:0, bt2_non_aligned_spikein:9615, bt2_total_aligned_spikein:1, - // bt2_total_reads_target:9616, bt2_align1_target:315, bt2_align_gt1_target:449, bt2_non_aligned_target:8852, bt2_total_aligned_target:764], BAM] - //ch_samtools_bam | view - //EXPORT_META ( ch_annotated_meta.collect{ it[0] } ) - - /* - * SUBWORKFLOW: Annotate meta-data with duplication stats + * SUBWORKFLOW: extract duplication stats from picard report */ + ch_metadata_picard_duplicates = Channel.empty() if (params.run_mark_dups) { - ANNOTATE_DEDUP_META( - ch_samtools_bam, + EXTRACT_PICARD_DUP_META ( ch_markduplicates_metrics, ch_dummy_file.collect(), - "dedup_", - "", false ) - ch_samtools_bam = ANNOTATE_DEDUP_META.out.output - ch_software_versions = ch_software_versions.mix(ANNOTATE_DEDUP_META.out.versions) + ch_metadata_picard_duplicates = EXTRACT_PICARD_DUP_META.out.metadata + ch_software_versions = ch_software_versions.mix(EXTRACT_PICARD_DUP_META.out.versions) } - //EXAMPLE CHANNEL STRUCT: [[META + dedup_library:unknown library, dedup_unpaired_reads_examined:0, dedup_read_pairs_examined:350, dedup_secondary_or_supplementary_rds:0, - // dedup_unmapped_reads:0, dedup_unpaired_read_duplicates:0, dedup_read_pair_duplicates:0, dedup_read_pair_optical_duplicates:0, dedup_percent_duplication:0, - // dedup_estimated_library_size:], BAM] - //ch_samtools_bam | view - - ch_bedgraph = Channel.empty() - ch_bigwig = Channel.empty() + //ch_metadata_picard_duplicates | view + + ch_bedgraph = Channel.empty() + ch_bigwig = Channel.empty() + ch_seacr_peaks = Channel.empty() + ch_macs2_peaks = Channel.empty() + ch_peaks_primary = Channel.empty() + ch_peaks_secondary = Channel.empty() + ch_peaks_summits = Channel.empty() + ch_consensus_peaks = Channel.empty() + ch_consensus_peaks_unfilt = Channel.empty() if(params.run_peak_calling) { /* * SUBWORKFLOW: Convert BAM files to bedgraph/bigwig and apply configured normalisation strategy @@ -386,45 +394,50 @@ workflow CUTANDRUN { PREPARE_PEAKCALLING( ch_samtools_bam, ch_samtools_bai, - PREPARE_GENOME.out.chrom_sizes, + PREPARE_GENOME.out.chrom_sizes.collect(), ch_dummy_file, params.normalisation_mode, + ch_metadata_bt2_spikein ) - ch_samtools_bam = PREPARE_PEAKCALLING.out.bam ch_bedgraph = PREPARE_PEAKCALLING.out.bedgraph ch_bigwig = PREPARE_PEAKCALLING.out.bigwig - ch_software_versions = ch_software_versions.mix(ANNOTATE_DEDUP_META.out.versions) + ch_software_versions = ch_software_versions.mix(PREPARE_PEAKCALLING.out.versions) /* * CHANNEL: Separate bedgraphs into target/control */ - ch_bedgraph.branch { it -> - target: it[0].is_control == false - control: it[0].is_control == true - } - .set { ch_bedgraph_split } - //ch_bedgraph_split.target | view - //ch_bedgraph_split.control | view + ch_bedgraph.filter { it -> it[0].is_control == false } + .set { ch_bedgraph_target } + ch_bedgraph.filter { it -> it[0].is_control == true } + .set { ch_bedgraph_control } + //ch_bedgraph_target | view + //ch_bedgraph_control | view - ch_seacr_bed = Channel.empty() - ch_macs2_bed = Channel.empty() - ch_peaks_bed = Channel.empty() + /* + * CHANNEL: Separate bams into target/control + */ + ch_samtools_bam.filter { it -> it[0].is_control == false } + .set { ch_bam_target } + ch_samtools_bam.filter { it -> it[0].is_control == true } + .set { ch_bam_control } + //ch_bam_target | view + //ch_bam_control | view if(params.use_control) { /* - * MODULE: Call peaks using SEACR with IgG control - */ + * MODULE: Call peaks using SEACR with IgG control + */ if('seacr' in callers) { /* - * CHANNEL: Pull control groups + * CHANNEL: Subset control groups */ - ch_bedgraph_split.target.map{ + ch_bedgraph_target.map{ row -> [row[0].control_group, row] } .set { ch_bg_target_ctrlgrp } //ch_bg_target_ctrlgrp | view - ch_bedgraph_split.control.map{ + ch_bedgraph_control.map{ row -> [row[0].control_group, row] } .set { ch_bg_control_ctrlgrp } @@ -443,32 +456,25 @@ workflow CUTANDRUN { SEACR_CALLPEAK ( ch_bedgraph_paired, - params.peak_threshold + params.seacr_peak_threshold ) - ch_seacr_bed = SEACR_CALLPEAK.out.bed + ch_seacr_peaks = SEACR_CALLPEAK.out.bed ch_software_versions = ch_software_versions.mix(SEACR_CALLPEAK.out.versions) // EXAMPLE CHANNEL STRUCT: [[META], BED] //SEACR_CALLPEAK.out.bed | view } if('macs2' in callers) { - ch_samtools_bam.branch{ it -> - target: it[0].is_control == false - control: it[0].is_control == true - } - .set { ch_samtools_bam_split } - // ch_samtools_bam_split.target | view - /* - * CHANNEL: Pull control groups + * CHANNEL: Split control groups */ - ch_samtools_bam_split.target.map{ + ch_bam_target.map{ row -> [row[0].control_group, row] } .set { ch_bam_target_ctrlgrp } //ch_bam_target_ctrlgrp | view - ch_samtools_bam_split.control.map{ + ch_bam_control.map{ row -> [row[0].control_group, row] } .set { ch_bam_control_ctrlgrp } @@ -481,7 +487,7 @@ workflow CUTANDRUN { ch_bam_control_ctrlgrp.cross(ch_bam_target_ctrlgrp).map{ row -> [row[1][1][0], row[1][1][1], row[0][1][1]] } - .set{ch_bam_paired} + .set{ ch_bam_paired } // EXAMPLE CHANNEL STRUCT: [[META], TARGET_BAM, CONTROL_BAM] // ch_bam_paired | view @@ -489,10 +495,11 @@ workflow CUTANDRUN { ch_bam_paired, params.macs_gsize ) - ch_macs2_bed = MACS2_CALLPEAK.out.bed + ch_macs2_peaks = MACS2_CALLPEAK.out.peak + ch_peaks_summits = MACS2_CALLPEAK.out.bed ch_software_versions = ch_software_versions.mix(MACS2_CALLPEAK.out.versions) // EXAMPLE CHANNEL STRUCT: [[META], BED] - //MACS2_CALLPEAK.out.bed | view + //MACS2_CALLPEAK.out.peak | view } } else { @@ -503,33 +510,26 @@ workflow CUTANDRUN { /* * CHANNEL: Add fake control channel */ - ch_bedgraph_split.target.map{ row-> [ row[0], row[1], [] ] } + ch_bedgraph_target.map{ row-> [ row[0], row[1], [] ] } .set { ch_bedgraph_target_fctrl } // EXAMPLE CHANNEL STRUCT: [[META], BED, FAKE_CTRL] // ch_bedgraph_target_fctrl | view SEACR_CALLPEAK_NOIGG ( ch_bedgraph_target_fctrl, - params.peak_threshold + params.seacr_peak_threshold ) - ch_seacr_bed = SEACR_CALLPEAK_NOIGG.out.bed + ch_seacr_peaks = SEACR_CALLPEAK_NOIGG.out.bed ch_software_versions = ch_software_versions.mix(SEACR_CALLPEAK_NOIGG.out.versions) // EXAMPLE CHANNEL STRUCT: [[META], BED] //SEACR_NO_IGG.out.bed | view } if('macs2' in callers) { - ch_samtools_bam.branch{ it -> - target: it[0].is_control == false - control: it[0].is_control == true - } - .set { ch_samtools_bam_split } - // ch_samtools_bam_split.target | view - /* * CHANNEL: Add fake control channel */ - ch_samtools_bam_split.target.map{ row-> [ row[0], row[1], [] ] } + ch_bam_target.map{ row-> [ row[0], row[1], [] ] } .set { ch_samtools_bam_target_fctrl } // EXAMPLE CHANNEL STRUCT: [[META], BAM, FAKE_CTRL] //ch_samtools_bam_target_fctrl | view @@ -538,26 +538,52 @@ workflow CUTANDRUN { ch_samtools_bam_target_fctrl, params.macs_gsize ) - ch_macs2_bed = MACS2_CALLPEAK_NOIGG.out.bed + ch_macs2_peaks = MACS2_CALLPEAK_NOIGG.out.peak + ch_peaks_summits = MACS2_CALLPEAK_NOIGG.out.bed ch_software_versions = ch_software_versions.mix(MACS2_CALLPEAK_NOIGG.out.versions) // EXAMPLE CHANNEL STRUCT: [[META], BED] - // MACS2_CALLPEAK_NOIGG.out.bed | view + // MACS2_CALLPEAK_NOIGG.out.peak | view } } - // Store output of primary peakcaller in the output channel + if ("macs2" in params.callers) { + /* + * MODULE: Convert narrow or broad peak to bed + */ + PEAK_TO_BED ( ch_macs2_peaks ) + ch_macs2_peaks = PEAK_TO_BED.out.file + ch_software_versions = ch_software_versions.mix(PEAK_TO_BED.out.versions) + // EXAMPLE CHANNEL STRUCT: [[META], BED] + //PEAK_TO_BED.out.file | view + } + + // Identify the primary peak data stream for downstream analysis if(callers[0] == 'seacr') { - ch_peaks_bed = ch_seacr_bed + ch_peaks_primary = ch_seacr_peaks + ch_peaks_secondary = ch_macs2_peaks } if(callers[0] == 'macs2') { - ch_peaks_bed = ch_macs2_bed + ch_peaks_primary = ch_macs2_peaks + ch_peaks_secondary = ch_seacr_peaks + } + + if(callers[0] == 'seacr') { + /* + * MODULE: Extract summits from seacr peak beds + */ + AWK_EXTRACT_SUMMITS ( + ch_peaks_primary + ) + ch_peaks_summits = AWK_EXTRACT_SUMMITS.out.file + ch_software_versions = ch_software_versions.mix(AWK_EXTRACT_SUMMITS.out.versions) + //AWK_EXTRACT_SUMMITS.out.file | view } /* * MODULE: Add sample identifier column to peak beds */ AWK_NAME_PEAK_BED ( - ch_peaks_bed + ch_peaks_primary ) ch_software_versions = ch_software_versions.mix(AWK_NAME_PEAK_BED.out.versions) // EXAMPLE CHANNEL STRUCT: [[META], BED] @@ -571,7 +597,7 @@ workflow CUTANDRUN { .map { row -> [ 1, row[1] ] } .groupTuple(by: [0]) .map { row -> - new_meta = [:] + def new_meta = [:] new_meta.put( "id", "all_samples" ) [ new_meta, row[1].flatten() ] } @@ -590,32 +616,21 @@ workflow CUTANDRUN { * SUBWORKFLOW: Construct group consensus peaks */ CONSENSUS_PEAKS_ALL ( - ch_peaks_bed_all, - params.skip_upset_plots + ch_peaks_bed_all ) - ch_software_versions = ch_software_versions.mix(CONSENSUS_PEAKS_ALL.out.versions) + ch_consensus_peaks = CONSENSUS_PEAKS_ALL.out.filtered_bed + ch_consensus_peaks_unfilt = CONSENSUS_PEAKS_ALL.out.merged_bed + ch_software_versions = ch_software_versions.mix(CONSENSUS_PEAKS_ALL.out.versions) // EXAMPLE CHANNEL STRUCT: [[META], BED] //CONSENSUS_PEAKS_ALL.out.bed | view - } else { /* - * CHANNEL: Group samples based on group + * CHANNEL: Group samples based on group name */ AWK_NAME_PEAK_BED.out.file .map { row -> [ row[0].group, row[1] ] } .groupTuple(by: [0]) - .map { row -> - new_meta = [:] - new_meta.put( "id", row[0] ) - [ new_meta, row[1].flatten() ] - } - .map { row -> - [ row[0], row[1], row[1].size() ] - } - .filter { row -> row[2] > 1 } - .map { row -> - [ row[0], row[1] ] - } + .map { row -> [ [id: row[0]], row[1].flatten() ] } .set { ch_peaks_bed_group } // EXAMPLE CHANNEL STRUCT: [[id: ], [BED1, BED2, BEDn...], count] //ch_peaks_bed_group | view @@ -625,122 +640,55 @@ workflow CUTANDRUN { * where there is more than 1 replicate in a group */ CONSENSUS_PEAKS ( - ch_peaks_bed_group, - params.skip_upset_plots + ch_peaks_bed_group ) - ch_software_versions = ch_software_versions.mix(CONSENSUS_PEAKS.out.versions) + ch_consensus_peaks = CONSENSUS_PEAKS.out.filtered_bed + ch_consensus_peaks_unfilt = CONSENSUS_PEAKS.out.merged_bed + ch_software_versions = ch_software_versions.mix(CONSENSUS_PEAKS.out.versions) // EXAMPLE CHANNEL STRUCT: [[META], BED] //CONSENSUS_PEAKS.out.bed | view } - - /* - * SUBWORKFLOW: Calculate fragment bed from bams - * - Filter for mapped reads - * - Convert to bed file - * - Keep the read pairs that are on the same chromosome and fragment length less than 1000bp - * - Only extract the fragment related columns using cut - */ - CALCULATE_FRAGMENTS ( - ch_samtools_bam - ) - ch_software_versions = ch_software_versions.mix(CALCULATE_FRAGMENTS.out.versions) - //EXAMPLE CHANNEL STRUCT: NO CHANGE - //CALCULATE_FRAGMENTS.out.bed | view - - /* - * MODULE: Bin the fragments into 500bp bins ready for downstream reporting - */ - AWK_FRAG_BIN( - CALCULATE_FRAGMENTS.out.bed - ) - ch_software_versions = ch_software_versions.mix(AWK_FRAG_BIN.out.versions) - //AWK_FRAG_BIN.out.file | view - - /* - * CHANNEL: Combine bam and bai files on id - */ - ch_samtools_bam.map { row -> [row[0].id, row ].flatten()} - .join ( ch_samtools_bai.map { row -> [row[0].id, row ].flatten()} ) - .map { row -> [row[1], row[2], row[4]] } - .set { ch_bam_bai } - // EXAMPLE CHANNEL STRUCT: [[META], BAM, BAI] - //ch_bam_bai | view - - /* - * MODULE: Calculate fragment lengths - */ - SAMTOOLS_CUSTOMVIEW ( - ch_bam_bai - ) - // ch_software_versions = ch_software_versions.mix(SAMTOOLS_CUSTOMVIEW.out.versions) - //SAMTOOLS_CUSTOMVIEW.out.tsv | view } + ch_dt_corrmatrix = Channel.empty() + ch_dt_pcadata = Channel.empty() + ch_dt_fpmatrix = Channel.empty() + ch_peakqc_frip_mqc = Channel.empty() + ch_peakqc_count_mqc = Channel.empty() + ch_peakqc_count_consensus_mqc = Channel.empty() + ch_peakqc_reprod_perc_mqc = Channel.empty() + ch_frag_len_hist_mqc = Channel.empty() if(params.run_reporting) { if(params.run_igv) { /* * MODULE: Create igv session */ IGV_SESSION ( - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gtf, - ch_peaks_bed.collect{it[1]}.ifEmpty([]), + PREPARE_GENOME.out.fasta.map {it[1]}, + PREPARE_GENOME.out.fasta_index.map {it[1]}, + PREPARE_GENOME.out.bed_index, + //PREPARE_GENOME.out.gtf.collect(), + ch_peaks_primary.collect{it[1]}.filter{ it -> it.size() > 1}.ifEmpty([]), + ch_peaks_secondary.collect{it[1]}.filter{ it -> it.size() > 1}.ifEmpty([]), ch_bigwig.collect{it[1]}.ifEmpty([]) ) //ch_software_versions = ch_software_versions.mix(IGV_SESSION.out.versions) } - if (params.run_deep_tools && params.run_peak_calling) { - /* - * MODULE: Extract max signal from peak beds - */ - AWK_EDIT_PEAK_BED ( - ch_peaks_bed - ) - ch_software_versions = ch_software_versions.mix(AWK_EDIT_PEAK_BED.out.versions) - //AWK_EDIT_PEAK_BED.out.file | view - - /* - * CHANNEL: Structure output for join on id - */ - AWK_EDIT_PEAK_BED.out.file - .map { row -> [row[0].id, row ].flatten()} - .set { ch_peaks_bed_id } - //ch_peaks_bed_id | view - + if (params.run_deeptools_heatmaps && params.run_peak_calling) { /* * CHANNEL: Remove IgG from bigwig channel */ - ch_bigwig - .filter { it[0].is_control == false } - .set { ch_bigwig_no_igg } + ch_bigwig.filter { it[0].is_control == false } + .set { ch_bigwig_no_igg } //ch_bigwig_no_igg | view - /* - * CHANNEL: Join beds and bigwigs on id - */ - ch_bigwig_no_igg - .map { row -> [row[0].id, row ].flatten()} - .join ( ch_peaks_bed_id ) - .set { ch_dt_peaks } - //ch_dt_peaks | view - - ch_dt_peaks - .map { row -> row[1,2] } - .set { ch_ordered_bigwig } - //ch_ordered_bigwig | view - - ch_dt_peaks - .map { row -> row[-1] } - .set { ch_ordered_peaks_max } - //ch_ordered_peaks_max | view - /* * MODULE: Compute DeepTools matrix used in heatmap plotting for Genes */ DEEPTOOLS_COMPUTEMATRIX_GENE ( ch_bigwig_no_igg, - PREPARE_GENOME.out.bed + PREPARE_GENOME.out.bed.collect() ) ch_software_versions = ch_software_versions.mix(DEEPTOOLS_COMPUTEMATRIX_GENE.out.versions) @@ -752,18 +700,40 @@ workflow CUTANDRUN { ) ch_software_versions = ch_software_versions.mix(DEEPTOOLS_PLOTHEATMAP_GENE.out.versions) - // Run if not empty file size > 1 byte - ch_ordered_peaks_max - .filter { it -> it.size() > 1} - .set { ch_ordered_peaks_max_notempty } - //ch_ordered_peaks_max_notempty | view + /* + * CHANNEL: Structure output for join on id + */ + ch_peaks_summits + .map { row -> [row[0].id, row ].flatten()} + .set { ch_peaks_summits_id } + //ch_peaks_bed_id | view + + /* + * CHANNEL: Join beds and bigwigs on id + */ + ch_bigwig_no_igg + .map { row -> [row[0].id, row ].flatten()} + .join ( ch_peaks_summits_id ) + .set { ch_dt_bigwig_summits } + //ch_dt_peaks | view + + ch_dt_bigwig_summits + .map { row -> row[1,2] } + .set { ch_ordered_bigwig } + //ch_ordered_bigwig | view + + ch_dt_bigwig_summits + .map { row -> row[-1] } + .filter { it -> it.size() > 1} + .set { ch_ordered_peaks_max } + //ch_ordered_peaks_max | view /* * MODULE: Compute DeepTools matrix used in heatmap plotting for Peaks */ DEEPTOOLS_COMPUTEMATRIX_PEAKS ( ch_ordered_bigwig, - ch_ordered_peaks_max_notempty + ch_ordered_peaks_max ) ch_software_versions = ch_software_versions.mix(DEEPTOOLS_COMPUTEMATRIX_PEAKS.out.versions) //EXAMPLE CHANNEL STRUCT: [[META], MATRIX] @@ -778,166 +748,125 @@ workflow CUTANDRUN { ch_software_versions = ch_software_versions.mix(DEEPTOOLS_PLOTHEATMAP_PEAKS.out.versions) } + if(params.run_deeptools_qc) { + /* + * SUBWORKFLOW: Run suite of deeptools QC on bam files + */ + DEEPTOOLS_QC ( + ch_samtools_bam, + ch_samtools_bai + ) + ch_dt_corrmatrix = DEEPTOOLS_QC.out.correlation_matrix + ch_dt_pcadata = DEEPTOOLS_QC.out.pca_data + ch_dt_fpmatrix = DEEPTOOLS_QC.out.fingerprint_matrix + ch_software_versions = ch_software_versions.mix(DEEPTOOLS_QC.out.versions) + } + /* - * CHANNEL: Join bams and beds on id + * CHANNEL: Filter bais for target only */ - ch_samtools_bam - .map { row -> [row[0].id, row ].flatten()} - .join ( ch_samtools_bai.map { row -> [row[0].id, row ].flatten()} ) - .join ( ch_peaks_bed.map { row -> [row[0].id, row ].flatten()} ) - .map { row -> [row[1], row[2], row[4], row[6]] } - .set { ch_bam_bai_bed } - // EXAMPLE CHANNEL STRUCT: [[META], BAM, BAI, BED] - //ch_bam_bai_bed | view - - ch_samtools_bam_ctrl = ch_samtools_bam - if(!params.skip_frip) { + ch_samtools_bai.filter { it -> it[0].is_control == false } + .set { ch_bai_target } + //ch_bai_target | view + + if (params.run_peak_qc && params.run_peak_calling) { + /* + * CHANNEL: Filter flagstat for target only + */ + ch_samtools_flagstat.filter { it -> it[0].is_control == false } + .set { ch_flagstat_target } + //ch_flagstat_target | view + /* - * MODULE: Calculate Frip scores for samples + * SUBWORKFLOW: Extract fragments from bam files for fragment-based FRiP score */ - CALCULATE_FRIP ( - ch_bam_bai_bed + EXTRACT_FRAGMENTS ( + ch_bam_target ) - ch_software_versions = ch_software_versions.mix(CALCULATE_FRIP.out.versions) /* - * SUBWORKFLOW: Annotate meta-data with frip stats + * SUBWORKFLOW: Run suite of peak QC on peaks */ - ANNOTATE_FRIP_META ( - ch_samtools_bam, - CALCULATE_FRIP.out.frips, - "", - "" + PEAK_QC( + ch_peaks_primary, + AWK_NAME_PEAK_BED.out.file, + ch_consensus_peaks, + ch_consensus_peaks_unfilt, + EXTRACT_FRAGMENTS.out.bed, + ch_flagstat_target, + params.min_frip_overlap, + ch_frip_score_header_multiqc, + ch_peak_counts_header_multiqc, + ch_peak_counts_consensus_header_multiqc, + ch_peak_reprod_header_multiqc ) - ch_samtools_bam = ANNOTATE_FRIP_META.out.output - //ch_samtools_bam | view + ch_peakqc_frip_mqc = PEAK_QC.out.primary_frip_mqc + ch_peakqc_count_mqc = PEAK_QC.out.primary_count_mqc + ch_peakqc_count_consensus_mqc = PEAK_QC.out.consensus_count_mqc + ch_peakqc_reprod_perc_mqc = PEAK_QC.out.reprod_perc_mqc + ch_software_versions = ch_software_versions.mix(PEAK_QC.out.versions) } - - /* - * MODULE: Trim unwanted columns for downstream reporting - */ - CUT_CALC_REPROD ( - AWK_NAME_PEAK_BED.out.file - ) - ch_software_versions = ch_software_versions.mix(CUT_CALC_REPROD.out.versions) + //ch_peakqc_reprod_perc_mqc | view /* - * CHANNEL: Group samples based on group - */ - CUT_CALC_REPROD.out.file - .map { row -> [ row[0].group, row[1] ] } - .groupTuple(by: [0]) - .map { row -> - new_meta = [:] - new_meta.put( "id", row[0] ) - [ new_meta, row[1].flatten() ] - } - .map { row -> - [ row[0], row[1], row[1].size() ] - } - .filter { row -> row[2] > 1 } - .map { row -> - [ row[0], row[1] ] - } - .set { ch_seacr_bed_group_2 } - - /* - * CHANNEL: Per group, create a channel per one against all combination + * CHANNEL: Combine bam and bai files on id */ - ch_seacr_bed_group_2 - .flatMap{ - row -> - new_output = [] - row[1].each{ file -> - files_copy = row[1].collect() - files_copy.remove(files_copy.indexOf(file)) - new_output.add([[id: file.name.split("\\.")[0]], file, files_copy]) - } - new_output - } - .set { ch_beds_intersect } - //EXAMPLE CHANNEL STRUCT: [[META], BED (-a), [BED...n] (-b)] - //ch_beds_intersect | view + ch_bam_target.map { row -> [row[0].id, row ].flatten()} + .join ( ch_bai_target.map { row -> [row[0].id, row ].flatten()} ) + .map { row -> [row[1], row[2], row[4]] } + .set { ch_bam_bai } + // EXAMPLE CHANNEL STRUCT: [[META], BAM, BAI] + //ch_bam_bai | view /* - * MODULE: Find intra-group overlap + * MODULE: Calculate fragment lengths */ - BEDTOOLS_INTERSECT ( - ch_beds_intersect, - "bed" + SAMTOOLS_CUSTOMVIEW ( + ch_bam_bai ) - ch_software_versions = ch_software_versions.mix(BEDTOOLS_INTERSECT.out.versions) - //EXAMPLE CHANNEL STRUCT: [[META], BED] - //BEDTOOLS_INTERSECT.out.intersect | view + ch_software_versions = ch_software_versions.mix(SAMTOOLS_CUSTOMVIEW.out.versions) + //SAMTOOLS_CUSTOMVIEW.out.tsv | view /* - * MODULE: Use overlap to calculate a peak repro % + * CHANNEL: Prepare data for generate reports */ - CALCULATE_PEAK_REPROD ( - BEDTOOLS_INTERSECT.out.intersect - ) - ch_software_versions = ch_software_versions.mix(CALCULATE_PEAK_REPROD.out.versions) - //EXAMPLE CHANNEL STRUCT: [[META], CSV] - //CALCULATE_PEAK_REPROD.out.csv + // Make sure files are always in order for resume + ch_frag_len = SAMTOOLS_CUSTOMVIEW.out.tsv + .toSortedList { row -> row[0].id } + .map { + list -> + def output = [] + list.each{ v -> output.add(v[1]) } + output + } + //ch_frag_len | view /* - * SUBWORKFLOW: Annotate meta-data with peak stats + * MODULE: Calculate fragment length histogram for mqc */ - ANNOTATE_PEAK_REPRO_META ( - ch_samtools_bam, - CALCULATE_PEAK_REPROD.out.csv, - "", - "" + FRAG_LEN_HIST( + ch_frag_len, + ch_frag_len_header_multiqc ) - ch_samtools_bam = ANNOTATE_PEAK_REPRO_META.out.output - //ch_samtools_bam | view - //ANNOTATE_PEAK_REPRO_META.out.output | view + ch_frag_len_hist_mqc = FRAG_LEN_HIST.out.frag_len_mqc + ch_software_versions = ch_software_versions.mix(FRAG_LEN_HIST.out.versions) + } + //ch_frag_len_hist_mqc | view - /* - * MODULE: Export meta-data to csv file - */ - EXPORT_META ( - ch_samtools_bam.collect{it[0]}, - "meta_table" - ) + if (params.run_multiqc) { + workflow_summary = WorkflowCutandrun.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) /* - * MODULE: Export meta-data to csv file + * MODULE: Collect software versions used in pipeline */ - EXPORT_META_CTRL ( - ch_samtools_bam_ctrl.collect{it[0]}, - "meta_table_ctrl" + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_software_versions.unique().collectFile() ) /* - * MODULE: Generate python reporting using mixture of meta-data and direct file processing + * MODULE: Multiqc */ - GENERATE_REPORTS( - EXPORT_META.out.csv.collect().ifEmpty([]), // meta-data report stats - EXPORT_META_CTRL.out.csv, // meta-data report stats - SAMTOOLS_CUSTOMVIEW.out.tsv.collect{it[1]}, // raw fragments - AWK_FRAG_BIN.out.file.collect{it[1]}, // binned fragments - ch_peaks_bed.collect{it[1]}, // peak beds - ch_frag_len_header_multiqc // multiqc config header for fragment length distribution plot - ) - ch_frag_len_multiqc = GENERATE_REPORTS.out.frag_len_multiqc - ch_software_versions = ch_software_versions.mix(GENERATE_REPORTS.out.versions) - } - - /* - * MODULE: Collect software versions used in pipeline - */ - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_software_versions.unique().collectFile() - ) - - /* - * MODULE: Multiqc - */ - if (params.run_multiqc) { - workflow_summary = WorkflowCutandrun.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - MULTIQC ( ch_multiqc_config, ch_multiqc_custom_config.collect().ifEmpty([]), @@ -953,7 +882,15 @@ workflow CUTANDRUN { ch_samtools_flagstat.collect{it[1]}.ifEmpty([]), ch_samtools_idxstats.collect{it[1]}.ifEmpty([]), ch_markduplicates_metrics.collect{it[1]}.ifEmpty([]), - ch_frag_len_multiqc.collect().ifEmpty([]) + ch_preseq_output.collect{it[1]}.ifEmpty([]), + ch_dt_corrmatrix.collect{it[1]}.ifEmpty([]), + ch_dt_pcadata.collect{it[1]}.ifEmpty([]), + ch_dt_fpmatrix.collect{it[1]}.ifEmpty([]), + ch_peakqc_count_mqc.collect{it[1]}.ifEmpty([]), + ch_peakqc_frip_mqc.collect{it[1]}.ifEmpty([]), + ch_peakqc_count_consensus_mqc.collect{it[1]}.ifEmpty([]), + ch_peakqc_reprod_perc_mqc.collect().ifEmpty([]), + ch_frag_len_hist_mqc.collect().ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() } @@ -966,6 +903,9 @@ workflow CUTANDRUN { workflow.onComplete { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + } } ////////////////////////////////////////////////////