diff --git a/bin/fastqc.sh b/bin/fastqc.sh deleted file mode 100755 index 93f38b6..0000000 --- a/bin/fastqc.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash -sample_id="$1" -reads="$2" - -mkdir fastqc_${sample_id}_logs -fastqc -o fastqc_${sample_id}_logs -f fastq -q ${reads} diff --git a/conda.yml b/conda.yml deleted file mode 100644 index 8ed4151..0000000 --- a/conda.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: rnaseq-nf -channels: - - seqera - - bioconda - - conda-forge - - defaults -dependencies: - - salmon=1.10.2 - - fastqc=0.12.1 - - multiqc=1.15 diff --git a/docker/Docker.s3 b/docker/Docker.s3 deleted file mode 100644 index 910ad17..0000000 --- a/docker/Docker.s3 +++ /dev/null @@ -1,3 +0,0 @@ -FROM nextflow/rnaseq-nf:latest - -RUN conda install -y awscli \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index f6147f8..0000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM mambaorg/micromamba -MAINTAINER Paolo Di Tommaso - -RUN \ - micromamba install -y -n base -c defaults -c bioconda -c conda-forge \ - salmon=1.10.2 \ - fastqc=0.12.1 \ - multiqc=1.17 \ - python=3.11 \ - typing_extensions \ - importlib_metadata \ - procps-ng \ - && micromamba clean -a -y - -ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH" -USER root diff --git a/docker/Makefile b/docker/Makefile deleted file mode 100644 index 8cc51da..0000000 --- a/docker/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -version ?= v1.2.1 - -all: build push - -build: - cp ../conda.yml . - docker build --output=type=docker --progress=plain --tag quay.io/nextflow/rnaseq-nf:${version} . - -push: - docker push quay.io/nextflow/rnaseq-nf:${version} diff --git a/main.nf b/main.nf index 5cc1280..3a4af3f 100755 --- a/main.nf +++ b/main.nf @@ -1,37 +1,116 @@ -#!/usr/bin/env nextflow +#!/usr/bin/env nextflow /* - * Proof of concept of a RNAseq pipeline implemented with Nextflow + * Pipeline parameters */ +// Input data +params.reads = "${workflow.projectDir}/data/ggal/ggal_gut_{1,2}.fq" + +// Reference file +params.transcriptome = "${workflow.projectDir}/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa" + +// Output directory +params.outdir = "results" /* - * Default pipeline parameters. They can be overriden on the command line eg. - * given `params.foo` specify on the run command line `--foo some_value`. + * Index reference transcriptome file */ +process INDEX { + tag "$transcriptome.simpleName" + container "community.wave.seqera.io/library/salmon:1.10.3--482593b6cd04c9b7" + conda "bioconda::salmon=1.10.3" -params.reads = "$baseDir/data/ggal/ggal_gut_{1,2}.fq" -params.transcriptome = "$baseDir/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa" -params.outdir = "results" -params.multiqc = "$baseDir/multiqc" - -log.info """\ - R N A S E Q - N F P I P E L I N E - =================================== - transcriptome: ${params.transcriptome} - reads : ${params.reads} - outdir : ${params.outdir} - """ - -// import modules -include { RNASEQ } from './modules/rnaseq' -include { MULTIQC } from './modules/multiqc' - -/* - * main script flow + input: + path transcriptome + + output: + path 'index' + + script: + """ + salmon index --threads $task.cpus -t $transcriptome -i index + """ +} + +/* + * Generate FastQC reports */ -workflow { - read_pairs_ch = channel.fromFilePairs( params.reads, checkIfExists: true ) - RNASEQ( params.transcriptome, read_pairs_ch ) - MULTIQC( RNASEQ.out, params.multiqc ) +process FASTQC { + tag "FASTQC on $sample_id" + publishDir params.outdir, mode:'copy' + container "community.wave.seqera.io/library/fastqc:0.12.1--5cfd0f3cb6760c42" + conda "bioconda::fastqc:0.12.1" + + input: + tuple val(sample_id), path(reads) + + output: + path "fastqc_${sample_id}_logs" + + script: + """ + mkdir fastqc_${sample_id}_logs + fastqc -o fastqc_${sample_id}_logs -f fastq -q ${reads} + """ +} + +/* + * Quantify reads + */ +process QUANT { + tag "$pair_id" + publishDir params.outdir, mode:'copy' + container "community.wave.seqera.io/library/salmon:1.10.3--482593b6cd04c9b7" + conda "bioconda::salmon=1.10.3" + + input: + path index + tuple val(pair_id), path(reads) + + output: + path pair_id + + script: + """ + salmon quant --threads $task.cpus --libType=U -i $index -1 ${reads[0]} -2 ${reads[1]} -o $pair_id + """ } + +/* + * Generate MultiQC report + */ +process MULTIQC { + publishDir params.outdir, mode:'copy' + container "community.wave.seqera.io/library/multiqc:1.24.1--789bc3917c8666da" + conda "bioconda::multiqc:1.24.1" + + input: + path '*' + + output: + path 'multiqc_report.html' + + script: + """ + multiqc . + """ +} + +workflow { + + // Paired reference data + read_pairs_ch = channel.fromFilePairs( params.reads, checkIfExists: true ) + + // Index reference transcriptome file + INDEX(params.transcriptome) + + // Generate FastQC reports + FASTQC(read_pairs_ch) + + // Quantify reads + QUANT(INDEX.out, read_pairs_ch) + + // Generate MultiQC report + MULTIQC(QUANT.out.mix(FASTQC.out).collect()) +} \ No newline at end of file diff --git a/modules/fastqc/main.nf b/modules/fastqc/main.nf deleted file mode 100644 index 57c0477..0000000 --- a/modules/fastqc/main.nf +++ /dev/null @@ -1,18 +0,0 @@ -params.outdir = 'results' - -process FASTQC { - tag "FASTQC on $sample_id" - conda 'bioconda::fastqc=0.12.1' - publishDir params.outdir, mode:'copy' - - input: - tuple val(sample_id), path(reads) - - output: - path "fastqc_${sample_id}_logs", emit: logs - - script: - """ - fastqc.sh "$sample_id" "$reads" - """ -} diff --git a/modules/index/main.nf b/modules/index/main.nf deleted file mode 100644 index 72187c8..0000000 --- a/modules/index/main.nf +++ /dev/null @@ -1,16 +0,0 @@ - -process INDEX { - tag "$transcriptome.simpleName" - conda 'bioconda::salmon=1.10.2' - - input: - path transcriptome - - output: - path 'index' - - script: - """ - salmon index --threads $task.cpus -t $transcriptome -i index - """ -} diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf deleted file mode 100644 index 408d53b..0000000 --- a/modules/multiqc/main.nf +++ /dev/null @@ -1,20 +0,0 @@ -params.outdir = 'results' - -process MULTIQC { - conda 'bioconda::multiqc=1.17' - publishDir params.outdir, mode:'copy' - - input: - path '*' - path 'config' - - output: - path 'multiqc_report.html', emit: report - - script: - """ - cp $config/* . - echo "custom_logo: \$PWD/logo.png" >> multiqc_config.yaml - multiqc -o multiqc_report.html . - """ -} diff --git a/modules/quant/main.nf b/modules/quant/main.nf deleted file mode 100644 index a873ddb..0000000 --- a/modules/quant/main.nf +++ /dev/null @@ -1,17 +0,0 @@ - -process QUANT { - tag "$pair_id" - conda 'bioconda::salmon=1.10.2' - - input: - path index - tuple val(pair_id), path(reads) - - output: - path pair_id - - script: - """ - salmon quant --threads $task.cpus --libType=U -i $index -1 ${reads[0]} -2 ${reads[1]} -o $pair_id - """ -} diff --git a/modules/rnaseq.nf b/modules/rnaseq.nf deleted file mode 100644 index 2f607c1..0000000 --- a/modules/rnaseq.nf +++ /dev/null @@ -1,19 +0,0 @@ -params.outdir = 'results' - -include { INDEX } from './index' -include { QUANT } from './quant' -include { FASTQC } from './fastqc' - -workflow RNASEQ { - take: - transcriptome - read_pairs_ch - - main: - INDEX(transcriptome) - FASTQC(read_pairs_ch) - QUANT(INDEX.out, read_pairs_ch) - - emit: - QUANT.out | concat(FASTQC.out) | collect -} \ No newline at end of file diff --git a/multiqc/logo.png b/multiqc/logo.png deleted file mode 100755 index 08a19e4..0000000 Binary files a/multiqc/logo.png and /dev/null differ diff --git a/multiqc/multiqc_config.yaml b/multiqc/multiqc_config.yaml deleted file mode 100755 index 041fb0b..0000000 --- a/multiqc/multiqc_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -title: "Nextflow RNA-seq demo" -subtitle: "A pipeline for quantification of genomic features from short read data" - -report_header_info: - - Contact E-mail: 'paolo.ditommaso@gmail.com' - - Application Type: 'RNA-seq' - - Project Type: 'Nextflow demo' - -custom_logo_url: 'https://www.nextflow.io' -custom_logo_title: 'Center for Genomic Regulation' - diff --git a/nextflow.config b/nextflow.config index e1297da..d3af3ea 100755 --- a/nextflow.config +++ b/nextflow.config @@ -1,136 +1 @@ -/* - * Copyright (c) 2013-2023, Seqera Labs. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * This Source Code Form is "Incompatible With Secondary Licenses", as - * defined by the Mozilla Public License, v. 2.0. - * - */ - -manifest { - description = 'Proof of concept of a RNA-seq pipeline implemented with Nextflow' - author = 'Paolo Di Tommaso' - nextflowVersion = '>=23.10.0' -} - -/* - * default params - */ - -params.outdir = "results" -params.reads = "${projectDir}/data/ggal/ggal_gut_{1,2}.fq" -params.transcriptome = "${projectDir}/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa" -params.multiqc = "${projectDir}/multiqc" - -/* - * defines execution profiles for different environments - */ - -profiles { - standard { - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - } - - 'all-reads' { - params.reads = "${projectDir}/data/ggal/ggal_*_{1,2}.fq" - } - - /* - * NOTE: requires nextflow 24.03.0-edge or later - */ - wave { - wave.enabled = true - wave.strategy = 'conda' - wave.freeze = true - } - - docker { - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - docker.enabled = true - } - - singularity { - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - singularity.enabled = true - singularity.autoMounts = true - } - - conda { - conda.enabled = true - conda.channels = 'seqera,conda-forge,bioconda,defaults' - } - - slurm { - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - process.executor = 'slurm' - singularity.enabled = true - } - - batch { - params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq' - params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa' - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - process.executor = 'awsbatch' - process.queue = 'nextflow-ci' - workDir = 's3://nextflow-ci/work' - aws.region = 'eu-west-1' - aws.batch.cliPath = '/home/ec2-user/miniconda/bin/aws' - } - - 's3-data' { - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq' - params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa' - } - - gls { - params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa' - params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq' - params.multiqc = 'gs://rnaseq-nf/multiqc' - process.executor = 'google-lifesciences' - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - workDir = 'gs://rnaseq-nf/scratch' // <- replace with your own bucket! - google.region = 'europe-west2' - } - - gcb { - params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa' - params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq' - params.multiqc = 'gs://rnaseq-nf/multiqc' - process.executor = 'google-batch' - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - workDir = 'gs://rnaseq-nf/scratch' // <- replace with your own bucket! - google.region = 'europe-west2' - } - - 'gs-data' { - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa' - params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq' - } - - azb { - process.container = 'quay.io/nextflow/rnaseq-nf:v1.2.1' - workDir = 'az://nf-scratch/work' - process.executor = 'azurebatch' - process.queue = 'nextflow-ci' // replace with your own Azure pool name - - azure { - batch { - location = 'westeurope' - accountName = "$AZURE_BATCH_ACCOUNT_NAME" - accountKey = "$AZURE_BATCH_ACCOUNT_KEY" - autoPoolMode = true - deletePoolsOnCompletion = true - } - - storage { - accountName = "$AZURE_STORAGE_ACCOUNT_NAME" - accountKey = "$AZURE_STORAGE_ACCOUNT_KEY" - } - } - } -} +docker.enabled = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 823417a..7ca8950 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,25 +15,17 @@ "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open", - "default": "results" + "fa_icon": "fas fa-folder-open" }, "reads": { "type": "string", "description": "The input read-pair files", - "fa_icon": "fas fa-folder-open", - "default": "${projectDir}/data/ggal/ggal_gut_{1,2}.fq" + "fa_icon": "fas fa-folder-open" }, "transcriptome": { "type": "string", "description": "The input transcriptome file", - "fa_icon": "fas fa-folder-open", - "default": "${projectDir}/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa" - }, - "multiqc": { - "type": "string", - "fa_icon": "fas fa-folder-open", - "default": "${projectDir}/multiqc" + "fa_icon": "fas fa-folder-open" } } }