From 075de66b3f60507b4b16c171e9975f2feda7d138 Mon Sep 17 00:00:00 2001 From: urbanl Date: Thu, 25 Jun 2020 14:58:55 -0400 Subject: [PATCH 1/9] add filterScore and sjdOverhangMin --- nextflow.config | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/nextflow.config b/nextflow.config index 60e4bcc5..3d74dd1a 100755 --- a/nextflow.config +++ b/nextflow.config @@ -5,23 +5,31 @@ manifest { } params { - // Main arguments + // Input files reads = false - singleEnd = false - star_index = false - gtf = false - assembly_name = false bams = false + rmats_pairs = false + download_from = false + key_file = false - // Reads + // Main arguments: + gtf = false + assembly_name = false + star_index = false + singleEnd = false stranded = 'first-strand' - rmats_pairs = false - adapter = false readlength = false - overhang = false - mismatch = 2 + + // Trimmomatic: minlen = 20 slidingwindow = true + adapter = false + + // Star + overhang = false + mismatch = 2 + filterScore = 0.66 + sjdOverhangMin = 8 // rMATS statoff = false @@ -32,8 +40,6 @@ params { // Other test = false - download_from = false - key_file = false skiprMATS = false skipMultiQC = false outdir = 'results' From 5b1f0490b0a9be808d9218920a7d82037a2130c3 Mon Sep 17 00:00:00 2001 From: urbanl Date: Thu, 25 Jun 2020 15:08:15 -0400 Subject: [PATCH 2/9] update help message to new format --- main.nf | 82 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/main.nf b/main.nf index 54928aa7..1c0c35e2 100755 --- a/main.nf +++ b/main.nf @@ -20,43 +20,83 @@ def helpMessage() { The typical command for running the pipeline is as follows: nextflow run main.nf --reads my_reads.csv --gtf genome.gtf --star_index star_dir -profile base,sumner + Input files: + --reads Path to reads.csv file, which specifies the sample_id and path to FASTQ files for each read or read pair (path). + This file is used if starting at beginning of pipeline. + (default: no reads.csv) + --bams Path to bams.csv file which specifies sample_id and path to BAM and BAM.bai files (path) + This file is used if starting pipeline at Stringtie. + (default: no bams.csv) + --rmats_pairs Path to rmats_pairs.txt file containing b1 (and b2) samples names (path) + (default: no rmats_pairs specified) + --download_from Database to download FASTQ/BAMs from (available = 'TCGA', 'GTEX' or 'SRA', false) (string) + (default: false) + --key_file For downloading reads, use TCGA authentication token (TCGA) or dbGAP repository key (GTEx, path) + (default: false) + Main arguments: - --reads Path to input data CSV file specifying the reads sample_id and path to FASTQ files (path) - --bams Path to input data CSV file specifying the bams sample_id and path to BAM files (path) - --gtf Path to GTF file (path) + --gtf Path to reference GTF file (path) + (default: no gtf specified) + --assembly_name Genome assembly name (available = 'GRCh38' or 'GRCm38', string) + (default: false) --star_index Path to STAR index (path) + (default: no index specified) + --singleEnd Specifies that the input is single-end reads (bool) + (default: false) + --stranded Specifies that the input is stranded ('first-strand', 'second-strand', false (aka unstranded)) + (default: 'first-strand') + --readlength Read length - Note that all reads will be cropped to this length(int) + (default: no read length specified) -profile Configuration profile to use. Can use multiple (comma separated, string) Available: base, docker, sumner, test and more. - Reads: - --rmats_pairs Path to file containing b1 & b2 samples names space seperated, one row for each rMATS comparison (path) - --singleEnd Specifies that the input is single-end reads (bool) - --stranded Specifies that the input is stranded (bool) - --adapter Path to adapter file (path) - --readlength Read length (int) - --overhang Overhang (default = readlength - 1, int) - --mismatch Mismatch (default = 2, int) - --minlen Drop the read if it is below a specified length (default = readlength, int) + Trimmomatic: + --minlen Drop the read if it is below a specified length (int) + Default parameters turn on --variable-readlength + To crop all reads and turn off, set minlen = readlength (NOTE: this will turn off soft clipping) + (default: 20) --slidingwindow Perform a sliding window trimming approach (bool) - - rMATS: + (default: true) + --adapter Path to adapter file (path) + (default: TruSeq3 for either PE or SE, see singleEnd parameter) + + Star: + --mismatch Number of allowed mismatches per read (SE) or combined read (PE) (int) + SE ex. read length of 50, allow 2 mismatches per 50 bp + PE ex. read length of 50, allow 2 mismatches per 100 bp + (default: 2) + --overhang Overhang (int) + (default: readlength - 1) + + rMATS: --statoff Skip the statistical analysis (bool) + If using only b1 as input, this must be turned on. + (default: false) --paired_stats Use the paired stats model (bool) - --novelSS Enable detection of novel splice sites (unannotated splice sites, bool) - --mil Minimum Intron Length. Only impacts --novelSS behavior (default = 50, int) - --mel Maximum Exon Length. Only impacts --novelSS behavior (default = 500, int) + (default: false) + --novelSS Enable detection of unnanotated splice sites (bool) + (default: false) + --mil Minimum Intron Length. Only impacts --novelSS behavior (int) + (default: 50) + --mel Maximum Exon Length. Only impacts --novelSS behavior (int) + (default: 500) Other: - --assembly_name Genome assembly name (available = 'GRCh38' or 'GRCm38', string) - --test For running QC, trimming and STAR only (bool) - --download_from Database to download FASTQ/BAMs from (available = 'TCGA', 'GTEX' or 'SRA', string) - --key_file For downloading reads, use TCGA authentication token (TCGA) or dbGAP repository key (GTEx, path) + --test For running trim test (bool) + (default: false) --max_cpus Maximum number of CPUs (int) + (default: ?) --max_memory Maximum memory (memory unit) + (default: 80) --max_time Maximum time (time unit) + (default: ?) --skiprMATS Skip rMATS (bool) + (default: false) --skipMultiQC Skip MultiQC (bool) + (default: false) --outdir The output directory where the results will be saved (string) + (default: directory where you submit the job) + See here for more info: https://github.com/TheJacksonLaboratory/splicing-pipelines-nf/blob/master/docs/usage.md """.stripIndent() From 110ab5c09cd4a51ea341839baace0eb639cff3de Mon Sep 17 00:00:00 2001 From: urbanl Date: Thu, 25 Jun 2020 15:15:29 -0400 Subject: [PATCH 3/9] update help message --- main.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/main.nf b/main.nf index 1c0c35e2..e66a0877 100755 --- a/main.nf +++ b/main.nf @@ -67,6 +67,10 @@ def helpMessage() { (default: 2) --overhang Overhang (int) (default: readlength - 1) + -- filterScore Controls --outFilterScoreMinOverLread and outFilterMatchNminOverLread + (default: 0.66) + --sjdOverhangMin Controls --alignSJDBoverhangMin (int) + (default: 8) rMATS: --statoff Skip the statistical analysis (bool) From 1761f20bd128e2a6186e6b4938686d0ee15b5159 Mon Sep 17 00:00:00 2001 From: urbanl Date: Thu, 25 Jun 2020 15:17:20 -0400 Subject: [PATCH 4/9] update usage.md --- docs/usage.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 33166f45..728ab05f 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,6 +131,11 @@ Star: (default: 2) --overhang Overhang (int) (default: readlength - 1) + --filterScore Controls --outFilterScoreMinOverLread and outFilterMatchNminOverLread + (default: 0.66) + --sjdOverhangMin Controls --alignSJDBoverhangMin (int) + (default: 8) + rMATS: --statoff Skip the statistical analysis (bool) From c4303f0a97a4a77bc5cb74af7c7802e749fe1567 Mon Sep 17 00:00:00 2001 From: urbanl Date: Thu, 25 Jun 2020 15:19:51 -0400 Subject: [PATCH 5/9] update log --- main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index e66a0877..a2f58f70 100755 --- a/main.nf +++ b/main.nf @@ -67,7 +67,7 @@ def helpMessage() { (default: 2) --overhang Overhang (int) (default: readlength - 1) - -- filterScore Controls --outFilterScoreMinOverLread and outFilterMatchNminOverLread + --filterScore Controls --outFilterScoreMinOverLread and outFilterMatchNminOverLread (default: 0.66) --sjdOverhangMin Controls --alignSJDBoverhangMin (int) (default: 8) @@ -147,6 +147,8 @@ log.info "rMATS novel splice sites : ${params.novelSS}" log.info "rMATS Minimum Intron Length : ${params.mil}" log.info "rMATS Maximum Exon Length : ${params.mel}" log.info "Mismatch : ${params.mismatch}" +log.info "filterScore : ${params.filterScore}" +log.info "sjdOverhangMin : ${params.sjdOverhangMin}" log.info "Test : ${params.test}" log.info "Download from : ${params.download_from ? params.download_from : 'FASTQs directly provided'}" log.info "Key file : ${params.key_file ? params.key_file : 'Not provided'}" From 36d18e6acf0ff5fd7679e093e6c3a32f5151a0d6 Mon Sep 17 00:00:00 2001 From: urbanl Date: Thu, 25 Jun 2020 15:27:07 -0400 Subject: [PATCH 6/9] update star with filterScore and sjdOverhangMin --- main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index a2f58f70..40b91dce 100755 --- a/main.nf +++ b/main.nf @@ -449,7 +449,9 @@ if (!params.bams){ --readFilesCommand zcat \ --sjdbGTFfile $gtf \ --sjdbOverhang $overhang \ - --alignSJoverhangMin 8 \ + --alignSJoverhangMin $params.sjdOverhangMin \ + --outFilterScoreMinOverLread $params.filterScore \ + --outFilterMatchNminOverLread $params.filterScore \ --outFilterMismatchNmax $params.mismatch \ --outFilterMultimapNmax 20 \ --alignMatesGapMax 1000000 \ From 73714880e70736f95551df944f3842f7c2f04e62 Mon Sep 17 00:00:00 2001 From: PhilPalmer Date: Fri, 26 Jun 2020 14:05:41 +0100 Subject: [PATCH 7/9] Updated log indenting --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 40b91dce..0e693100 100755 --- a/main.nf +++ b/main.nf @@ -147,8 +147,8 @@ log.info "rMATS novel splice sites : ${params.novelSS}" log.info "rMATS Minimum Intron Length : ${params.mil}" log.info "rMATS Maximum Exon Length : ${params.mel}" log.info "Mismatch : ${params.mismatch}" -log.info "filterScore : ${params.filterScore}" -log.info "sjdOverhangMin : ${params.sjdOverhangMin}" +log.info "filterScore : ${params.filterScore}" +log.info "sjdOverhangMin : ${params.sjdOverhangMin}" log.info "Test : ${params.test}" log.info "Download from : ${params.download_from ? params.download_from : 'FASTQs directly provided'}" log.info "Key file : ${params.key_file ? params.key_file : 'Not provided'}" From c5abd5984793eb4203047099e145c318c71d84d6 Mon Sep 17 00:00:00 2001 From: PhilPalmer Date: Fri, 26 Jun 2020 14:12:28 +0100 Subject: [PATCH 8/9] Updated new parameters indentation in config --- nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 3d74dd1a..53c791d4 100755 --- a/nextflow.config +++ b/nextflow.config @@ -28,8 +28,8 @@ params { // Star overhang = false mismatch = 2 - filterScore = 0.66 - sjdOverhangMin = 8 + filterScore = 0.66 + sjdOverhangMin = 8 // rMATS statoff = false From f23ce485f6c2da3ba086af750c000e29b7dde411 Mon Sep 17 00:00:00 2001 From: PhilPalmer Date: Fri, 26 Jun 2020 14:18:26 +0100 Subject: [PATCH 9/9] Updated indentation of all parameters in config --- nextflow.config | 58 ++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/nextflow.config b/nextflow.config index 53c791d4..1f6045ce 100755 --- a/nextflow.config +++ b/nextflow.config @@ -6,45 +6,45 @@ manifest { params { // Input files - reads = false - bams = false - rmats_pairs = false - download_from = false - key_file = false + reads = false + bams = false + rmats_pairs = false + download_from = false + key_file = false // Main arguments: - gtf = false - assembly_name = false - star_index = false - singleEnd = false - stranded = 'first-strand' - readlength = false + gtf = false + assembly_name = false + star_index = false + singleEnd = false + stranded = 'first-strand' + readlength = false // Trimmomatic: - minlen = 20 - slidingwindow = true - adapter = false + minlen = 20 + slidingwindow = true + adapter = false // Star - overhang = false - mismatch = 2 - filterScore = 0.66 - sjdOverhangMin = 8 + overhang = false + mismatch = 2 + filterScore = 0.66 + sjdOverhangMin = 8 // rMATS - statoff = false - paired_stats = false - novelSS = false - mil = 50 - mel = 500 + statoff = false + paired_stats = false + novelSS = false + mil = 50 + mel = 500 // Other - test = false - skiprMATS = false - skipMultiQC = false - outdir = 'results' - multiqc_config = "$baseDir/examples/assets/multiqc_config.yaml" - help = false + test = false + skiprMATS = false + skipMultiQC = false + outdir = 'results' + multiqc_config = "$baseDir/examples/assets/multiqc_config.yaml" + help = false } process {