diff --git a/README.md b/README.md index 20c7656f..41318481 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ ## Introduction -**nfcore/atacseq** is a bioinformatics analysis pipeline used for ATAC-seq data. +**nf-core/atacseq** is a bioinformatics analysis pipeline used for ATAC-seq data. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! diff --git a/assets/samplesheet_with_control.csv b/assets/samplesheet_with_control.csv index 0e0e1944..82ba791d 100644 --- a/assets/samplesheet_with_control.csv +++ b/assets/samplesheet_with_control.csv @@ -1,7 +1,9 @@ sample,fastq_1,fastq_2,replicate,control,control_replicate -SPT5_T0,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_2.fastq.gz,1,SPT5_INPUT,1 -SPT5_T0,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_2.fastq.gz,2,SPT5_INPUT,2 -SPT5_T15,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_2.fastq.gz,1,SPT5_INPUT,1 -SPT5_T15,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_2.fastq.gz,2,SPT5_INPUT,2 -SPT5_INPUT,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,1,, -SPT5_INPUT,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,2,, +INPUT_A,IEG577I1_S1_L001_R1_001.fastq.gz,IEG577I1_S1_L002_R2_001.fastq.gz,1,, +UNTREATED_A,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,1,INPUT_A,1 +UNTREATED_A,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,2,INPUT_A,1 +UNTREATED_A,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,3,INPUT_A,1 +TREATED_A,AEG588A4_S4_L003_R1_001.fastq.gz,,1,INPUT_A,1 +TREATED_A,AEG588A5_S5_L003_R1_001.fastq.gz,,2,INPUT_A,1 +TREATED_A,AEG588A6_S6_L003_R1_001.fastq.gz,,3,INPUT_A,1 +TREATED_A,AEG588A6_S6_L004_R1_001.fastq.gz,,3,INPUT_A,1 diff --git a/main.nf b/main.nf index 21172403..50005017 100644 --- a/main.nf +++ b/main.nf @@ -9,19 +9,6 @@ ---------------------------------------------------------------------------------------- */ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { ATACSEQ } from './workflows/atacseq' -include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_atacseq_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_atacseq_pipeline' -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_atacseq_pipeline' -include { getMacsGsize } from './subworkflows/local/utils_nfcore_atacseq_pipeline' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GENOME PARAMETER VALUES @@ -41,6 +28,17 @@ params.blacklist = getGenomeAttribute('blacklist') params.mito_name = getGenomeAttribute('mito_name') params.macs_gsize = getMacsGsize(params) +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { ATACSEQ } from './workflows/atacseq' +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_atacseq_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_atacseq_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NAMED WORKFLOWS FOR PIPELINE @@ -143,6 +141,37 @@ workflow { ) } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Get attribute from genome config file e.g. fasta +// + +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +def getMacsGsize(params) { + def val = null + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey('macs_gsize')) { + if (params.genomes[ params.genome ][ 'macs_gsize' ].containsKey(params.read_length.toString())) { + val = params.genomes[ params.genome ][ 'macs_gsize' ][ params.read_length.toString() ] + } + } + } + return val +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/subworkflows/local/utils_nfcore_atacseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_atacseq_pipeline/main.nf index 34673ff5..be4f2a9e 100644 --- a/subworkflows/local/utils_nfcore_atacseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_atacseq_pipeline/main.nf @@ -63,6 +63,7 @@ workflow PIPELINE_INITIALISATION { // // Custom validation for pipeline parameters // + validateInputParameters() } @@ -147,33 +148,6 @@ def validateInputParameters() { } } -// -// Validate channels from input samplesheet -// -def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect { meta -> meta.single_end }.unique().size == 1 - if (!endedness_ok) { - error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } - - return [metas[0], fastqs] -} - -// -// Get attribute from genome config file e.g. fasta -// -def getGenomeAttribute(attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null -} - // // Exit pipeline if incorrect --genome key provided // @@ -187,21 +161,6 @@ def genomeExistsError() { error(error_string) } } -// -// Get macs genome size (macs_gsize) -// -def getMacsGsize(params) { - def val = null - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey('macs_gsize')) { - if (params.genomes[ params.genome ][ 'macs_gsize' ].containsKey(params.read_length.toString())) { - val = params.genomes[ params.genome ][ 'macs_gsize' ][ params.read_length.toString() ] - } - } - } - return val -} - // // Generate methods description for MultiQC //