From f409b162ca95db829fce946e5fc7f8605e52ebaf Mon Sep 17 00:00:00 2001 From: Ashwini Chhipa Date: Mon, 29 Jun 2020 12:23:49 +0000 Subject: [PATCH 1/3] Add ampliconstats process for all the files (Samtools development branch) --- README.md | 3 +++ conf/illumina.config | 2 ++ modules/illumina.nf | 22 ++++++++++++++++++++++ workflows/illuminaNcov.nf | 10 ++++++++++ 4 files changed, 37 insertions(+) diff --git a/README.md b/README.md index 99f92bae..e7a6f5b3 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ This Nextflow pipeline automates the ARTIC network [nCoV-2019 novel coronavirus You can also use cram file input by passing the --cram flag. You can also specify cram file output by passing the --outCram flag. +You can also run apliconstats (from samtools development branch) by passing the --ampliconstats flag. For production use at large scale, where you will run the workflow many times, you can avoid cloning the scheme repository, creating an ivar bed file and indexing the reference every time by supplying both --ivarBed /path/to/ivar-compatible.bed and --alignerRefPrefix /path/to/bwa-indexed/ref.fa. @@ -42,6 +43,8 @@ The repo contains a environment.yml files which automatically build the correct --cache /some/dir can be specified to have a fixed, shared location to store the conda build for use by multiple runs of the workflow. +> For samtools-ampliconstats support commited [here](https://github.com/ac55-sanger/ncov2019-artic-nf/commit/aa127d4e98b3ef3bfe887c789ce03cbf483c34ef), you first need to build the conda packages of htslib and samtools from their development repository using the steps at [conda_samtools_development](https://github.com/ac55-sanger/conda_samtools_development) and modifying the `environment.yml` file as instructed [here](https://github.com/ac55-sanger/conda_samtools_development#install-using-an-environment-file). + #### Executors By default, the pipeline just runs on the local machine. You can specify `-profile slurm` to use a SLURM cluster, or `-profile lsf` to use an LSF cluster. In either case you may need to also use one of the COG-UK institutional config profiles (phw or sanger), or provide queue names to use in your own config file. diff --git a/conf/illumina.config b/conf/illumina.config index bd82749e..85c55c5b 100644 --- a/conf/illumina.config +++ b/conf/illumina.config @@ -48,6 +48,8 @@ params { // iVar minimum mapQ to call variant (ivar variants: -q) ivarMinVariantQuality = 20 + // Run ampliconstats (samtools development branch) + ampliconstats = false } def makeFastqSearchPath ( illuminaSuffixes, fastq_exts ) { diff --git a/modules/illumina.nf b/modules/illumina.nf index b9ecb669..d05f1acd 100644 --- a/modules/illumina.nf +++ b/modules/illumina.nf @@ -102,6 +102,28 @@ process trimPrimerSequences { """ } +process makeAmpliconstats { + + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "*.stats", mode: 'copy' + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "*.png", mode: 'copy' + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "*.gp", mode: 'copy' + + input: + file (ptrim_bams) + path(bedfile) + + output: + path "nCoV-2019.amp.stats", emit: ampstats + path "*.png", emit: amppng + path "*.gp", emit: ampgp + + script: + """ + samtools ampliconstats -@8 -d 1,20,100 ${bedfile} *.mapped.primertrimmed.sorted.bam > nCoV-2019.amp.stats + plot-ampliconstats -size 1200,900 nCoV-2019-ampliconstats nCoV-2019.amp.stats + """ +} + process callVariants { tag { sampleName } diff --git a/workflows/illuminaNcov.nf b/workflows/illuminaNcov.nf index 0a6b04ed..54c5b2fb 100644 --- a/workflows/illuminaNcov.nf +++ b/workflows/illuminaNcov.nf @@ -9,6 +9,7 @@ include {readTrimming} from '../modules/illumina.nf' include {indexReference} from '../modules/illumina.nf' include {readMapping} from '../modules/illumina.nf' include {trimPrimerSequences} from '../modules/illumina.nf' +include {makeAmpliconstats} from '../modules/illumina.nf' include {callVariants} from '../modules/illumina.nf' include {makeConsensus} from '../modules/illumina.nf' include {cramToFastq} from '../modules/illumina.nf' @@ -121,6 +122,15 @@ workflow sequenceAnalysis { } + // Generate ampliconstats only if --ampliconstats parameter is passed + if(params.ampliconstats) { + // Create a channel to collect only the mapped.primertrimmed.sorted.bam output files from trimPrimerSequences + + trimPrimerSequences.out.ptrim.map{sample, bam -> bam}.collect().set{ ch_ptrim_bam } + + makeAmpliconstats(ch_ptrim_bam, ch_bedFile) + } + emit: qc_pass = collateSamples.out } From 5cdbe0af127c039f5d0bb94cefbdb2b6dbd46fa8 Mon Sep 17 00:00:00 2001 From: Ashwini Chhipa <58439277+ac55-sanger@users.noreply.github.com> Date: Mon, 29 Jun 2020 14:50:47 +0100 Subject: [PATCH 2/3] Update README.md --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index e7a6f5b3..24c29957 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ This Nextflow pipeline automates the ARTIC network [nCoV-2019 novel coronavirus You can also use cram file input by passing the --cram flag. You can also specify cram file output by passing the --outCram flag. -You can also run apliconstats (from samtools development branch) by passing the --ampliconstats flag. +You can also run ampliconstats by passing the --ampliconstats flag. For production use at large scale, where you will run the workflow many times, you can avoid cloning the scheme repository, creating an ivar bed file and indexing the reference every time by supplying both --ivarBed /path/to/ivar-compatible.bed and --alignerRefPrefix /path/to/bwa-indexed/ref.fa. @@ -43,8 +43,6 @@ The repo contains a environment.yml files which automatically build the correct --cache /some/dir can be specified to have a fixed, shared location to store the conda build for use by multiple runs of the workflow. -> For samtools-ampliconstats support commited [here](https://github.com/ac55-sanger/ncov2019-artic-nf/commit/aa127d4e98b3ef3bfe887c789ce03cbf483c34ef), you first need to build the conda packages of htslib and samtools from their development repository using the steps at [conda_samtools_development](https://github.com/ac55-sanger/conda_samtools_development) and modifying the `environment.yml` file as instructed [here](https://github.com/ac55-sanger/conda_samtools_development#install-using-an-environment-file). - #### Executors By default, the pipeline just runs on the local machine. You can specify `-profile slurm` to use a SLURM cluster, or `-profile lsf` to use an LSF cluster. In either case you may need to also use one of the COG-UK institutional config profiles (phw or sanger), or provide queue names to use in your own config file. From a6c4eb817654465ecdaf330c9bca3030c9f152f3 Mon Sep 17 00:00:00 2001 From: Ashwini Chhipa <58439277+ac55-sanger@users.noreply.github.com> Date: Mon, 29 Jun 2020 15:16:06 +0100 Subject: [PATCH 3/3] Update illumina.config --- conf/illumina.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/illumina.config b/conf/illumina.config index 85c55c5b..7d81a841 100644 --- a/conf/illumina.config +++ b/conf/illumina.config @@ -48,7 +48,7 @@ params { // iVar minimum mapQ to call variant (ivar variants: -q) ivarMinVariantQuality = 20 - // Run ampliconstats (samtools development branch) + // Run samtools ampliconstats ampliconstats = false }