diff --git a/docs/usage.md b/docs/usage.md index 33166f45..728ab05f 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,6 +131,11 @@ Star: (default: 2) --overhang Overhang (int) (default: readlength - 1) + --filterScore Controls --outFilterScoreMinOverLread and outFilterMatchNminOverLread + (default: 0.66) + --sjdOverhangMin Controls --alignSJDBoverhangMin (int) + (default: 8) + rMATS: --statoff Skip the statistical analysis (bool) diff --git a/main.nf b/main.nf index 54928aa7..0e693100 100755 --- a/main.nf +++ b/main.nf @@ -20,43 +20,87 @@ def helpMessage() { The typical command for running the pipeline is as follows: nextflow run main.nf --reads my_reads.csv --gtf genome.gtf --star_index star_dir -profile base,sumner + Input files: + --reads Path to reads.csv file, which specifies the sample_id and path to FASTQ files for each read or read pair (path). + This file is used if starting at beginning of pipeline. + (default: no reads.csv) + --bams Path to bams.csv file which specifies sample_id and path to BAM and BAM.bai files (path) + This file is used if starting pipeline at Stringtie. + (default: no bams.csv) + --rmats_pairs Path to rmats_pairs.txt file containing b1 (and b2) samples names (path) + (default: no rmats_pairs specified) + --download_from Database to download FASTQ/BAMs from (available = 'TCGA', 'GTEX' or 'SRA', false) (string) + (default: false) + --key_file For downloading reads, use TCGA authentication token (TCGA) or dbGAP repository key (GTEx, path) + (default: false) + Main arguments: - --reads Path to input data CSV file specifying the reads sample_id and path to FASTQ files (path) - --bams Path to input data CSV file specifying the bams sample_id and path to BAM files (path) - --gtf Path to GTF file (path) + --gtf Path to reference GTF file (path) + (default: no gtf specified) + --assembly_name Genome assembly name (available = 'GRCh38' or 'GRCm38', string) + (default: false) --star_index Path to STAR index (path) + (default: no index specified) + --singleEnd Specifies that the input is single-end reads (bool) + (default: false) + --stranded Specifies that the input is stranded ('first-strand', 'second-strand', false (aka unstranded)) + (default: 'first-strand') + --readlength Read length - Note that all reads will be cropped to this length(int) + (default: no read length specified) -profile Configuration profile to use. Can use multiple (comma separated, string) Available: base, docker, sumner, test and more. - Reads: - --rmats_pairs Path to file containing b1 & b2 samples names space seperated, one row for each rMATS comparison (path) - --singleEnd Specifies that the input is single-end reads (bool) - --stranded Specifies that the input is stranded (bool) - --adapter Path to adapter file (path) - --readlength Read length (int) - --overhang Overhang (default = readlength - 1, int) - --mismatch Mismatch (default = 2, int) - --minlen Drop the read if it is below a specified length (default = readlength, int) + Trimmomatic: + --minlen Drop the read if it is below a specified length (int) + Default parameters turn on --variable-readlength + To crop all reads and turn off, set minlen = readlength (NOTE: this will turn off soft clipping) + (default: 20) --slidingwindow Perform a sliding window trimming approach (bool) - - rMATS: + (default: true) + --adapter Path to adapter file (path) + (default: TruSeq3 for either PE or SE, see singleEnd parameter) + + Star: + --mismatch Number of allowed mismatches per read (SE) or combined read (PE) (int) + SE ex. read length of 50, allow 2 mismatches per 50 bp + PE ex. read length of 50, allow 2 mismatches per 100 bp + (default: 2) + --overhang Overhang (int) + (default: readlength - 1) + --filterScore Controls --outFilterScoreMinOverLread and outFilterMatchNminOverLread + (default: 0.66) + --sjdOverhangMin Controls --alignSJDBoverhangMin (int) + (default: 8) + + rMATS: --statoff Skip the statistical analysis (bool) + If using only b1 as input, this must be turned on. + (default: false) --paired_stats Use the paired stats model (bool) - --novelSS Enable detection of novel splice sites (unannotated splice sites, bool) - --mil Minimum Intron Length. Only impacts --novelSS behavior (default = 50, int) - --mel Maximum Exon Length. Only impacts --novelSS behavior (default = 500, int) + (default: false) + --novelSS Enable detection of unnanotated splice sites (bool) + (default: false) + --mil Minimum Intron Length. Only impacts --novelSS behavior (int) + (default: 50) + --mel Maximum Exon Length. Only impacts --novelSS behavior (int) + (default: 500) Other: - --assembly_name Genome assembly name (available = 'GRCh38' or 'GRCm38', string) - --test For running QC, trimming and STAR only (bool) - --download_from Database to download FASTQ/BAMs from (available = 'TCGA', 'GTEX' or 'SRA', string) - --key_file For downloading reads, use TCGA authentication token (TCGA) or dbGAP repository key (GTEx, path) + --test For running trim test (bool) + (default: false) --max_cpus Maximum number of CPUs (int) + (default: ?) --max_memory Maximum memory (memory unit) + (default: 80) --max_time Maximum time (time unit) + (default: ?) --skiprMATS Skip rMATS (bool) + (default: false) --skipMultiQC Skip MultiQC (bool) + (default: false) --outdir The output directory where the results will be saved (string) + (default: directory where you submit the job) + See here for more info: https://github.com/TheJacksonLaboratory/splicing-pipelines-nf/blob/master/docs/usage.md """.stripIndent() @@ -103,6 +147,8 @@ log.info "rMATS novel splice sites : ${params.novelSS}" log.info "rMATS Minimum Intron Length : ${params.mil}" log.info "rMATS Maximum Exon Length : ${params.mel}" log.info "Mismatch : ${params.mismatch}" +log.info "filterScore : ${params.filterScore}" +log.info "sjdOverhangMin : ${params.sjdOverhangMin}" log.info "Test : ${params.test}" log.info "Download from : ${params.download_from ? params.download_from : 'FASTQs directly provided'}" log.info "Key file : ${params.key_file ? params.key_file : 'Not provided'}" @@ -403,7 +449,9 @@ if (!params.bams){ --readFilesCommand zcat \ --sjdbGTFfile $gtf \ --sjdbOverhang $overhang \ - --alignSJoverhangMin 8 \ + --alignSJoverhangMin $params.sjdOverhangMin \ + --outFilterScoreMinOverLread $params.filterScore \ + --outFilterMatchNminOverLread $params.filterScore \ --outFilterMismatchNmax $params.mismatch \ --outFilterMultimapNmax 20 \ --alignMatesGapMax 1000000 \ diff --git a/nextflow.config b/nextflow.config index 60e4bcc5..1f6045ce 100755 --- a/nextflow.config +++ b/nextflow.config @@ -5,40 +5,46 @@ manifest { } params { - // Main arguments - reads = false - singleEnd = false - star_index = false - gtf = false - assembly_name = false - bams = false + // Input files + reads = false + bams = false + rmats_pairs = false + download_from = false + key_file = false - // Reads - stranded = 'first-strand' - rmats_pairs = false - adapter = false - readlength = false - overhang = false - mismatch = 2 - minlen = 20 - slidingwindow = true + // Main arguments: + gtf = false + assembly_name = false + star_index = false + singleEnd = false + stranded = 'first-strand' + readlength = false + + // Trimmomatic: + minlen = 20 + slidingwindow = true + adapter = false + + // Star + overhang = false + mismatch = 2 + filterScore = 0.66 + sjdOverhangMin = 8 // rMATS - statoff = false - paired_stats = false - novelSS = false - mil = 50 - mel = 500 + statoff = false + paired_stats = false + novelSS = false + mil = 50 + mel = 500 // Other - test = false - download_from = false - key_file = false - skiprMATS = false - skipMultiQC = false - outdir = 'results' - multiqc_config = "$baseDir/examples/assets/multiqc_config.yaml" - help = false + test = false + skiprMATS = false + skipMultiQC = false + outdir = 'results' + multiqc_config = "$baseDir/examples/assets/multiqc_config.yaml" + help = false } process {