TheJacksonLaboratory · lmurba · Jun 26, 2020 · Jun 25, 2020 · Jun 25, 2020 · Jun 25, 2020
diff --git a/docs/usage.md b/docs/usage.md
@@ -131,6 +131,11 @@ Star:
                                 (default: 2)
   --overhang                    Overhang (int)
                                 (default: readlength - 1)
+  --filterScore                 Controls --outFilterScoreMinOverLread and outFilterMatchNminOverLread
+                                (default: 0.66)
+  --sjdOverhangMin              Controls --alignSJDBoverhangMin (int)
+                                (default: 8)
+
 
 rMATS:                              
   --statoff                     Skip the statistical analysis (bool)

diff --git a/main.nf b/main.nf
@@ -20,43 +20,87 @@ def helpMessage() {
     The typical command for running the pipeline is as follows:
     nextflow run main.nf --reads my_reads.csv --gtf genome.gtf --star_index star_dir -profile base,sumner
 
+    Input files:
+      --reads                       Path to reads.csv file, which specifies the sample_id and path to FASTQ files for each read or read pair (path).
+                                    This file is used if starting at beginning of pipeline. 
+                                    (default: no reads.csv)
+      --bams                        Path to bams.csv file which specifies sample_id and path to BAM and BAM.bai files (path)
+                                    This file is used if starting pipeline at Stringtie.
+                                    (default: no bams.csv)
+      --rmats_pairs                 Path to rmats_pairs.txt file containing b1 (and b2) samples names (path)
+                                    (default: no rmats_pairs specified) 
+      --download_from               Database to download FASTQ/BAMs from (available = 'TCGA', 'GTEX' or 'SRA', false) (string)
+                                    (default: false)
+      --key_file                    For downloading reads, use TCGA authentication token (TCGA) or dbGAP repository key (GTEx, path)
+                                    (default: false)  
+
     Main arguments:
-      --reads                       Path to input data CSV file specifying the reads sample_id and path to FASTQ files (path)
-      --bams                        Path to input data CSV file specifying the bams sample_id and path to BAM files (path)
-      --gtf                         Path to GTF file (path)
+      --gtf                         Path to reference GTF file (path)
+                                    (default: no gtf specified) 
+      --assembly_name               Genome assembly name (available = 'GRCh38' or 'GRCm38', string)
+                                    (default: false)
       --star_index                  Path to STAR index (path)
+                                    (default: no index specified)
+      --singleEnd                   Specifies that the input is single-end reads (bool)
+                                    (default: false)
+      --stranded                    Specifies that the input is stranded ('first-strand', 'second-strand', false (aka unstranded))
+                                    (default: 'first-strand')
+      --readlength                  Read length - Note that all reads will be cropped to this length(int)
+                                   (default: no read length specified)
       -profile                      Configuration profile to use. Can use multiple (comma separated, string)
                                     Available: base, docker, sumner, test and more.
 
-    Reads:
-      --rmats_pairs                 Path to file containing b1 & b2 samples names space seperated, one row for each rMATS comparison (path)
-      --singleEnd                   Specifies that the input is single-end reads (bool)
-      --stranded                    Specifies that the input is stranded (bool)
-      --adapter                     Path to adapter file (path)
-      --readlength                  Read length (int)
-      --overhang                    Overhang (default = readlength - 1, int)
-      --mismatch                    Mismatch (default = 2, int)
-      --minlen                      Drop the read if it is below a specified length (default = readlength, int)
+    Trimmomatic: 
+      --minlen                      Drop the read if it is below a specified length (int)
+    				    Default parameters turn on --variable-readlength
+				    To crop all reads and turn off, set minlen = readlength (NOTE: this will turn off soft clipping)                                
+                                    (default: 20)
       --slidingwindow               Perform a sliding window trimming approach (bool)
-
-    rMATS:
+                                    (default: true)
+      --adapter                     Path to adapter file (path)  
+                                    (default: TruSeq3 for either PE or SE, see singleEnd parameter)
+
+    Star:                    
+      --mismatch                    Number of allowed mismatches per read (SE) or combined read (PE) (int)
+                                    SE ex. read length of 50, allow 2 mismatches per 50 bp
+                                    PE ex. read length of 50, allow 2 mismatches per 100 bp 
+                                    (default: 2)
+      --overhang                    Overhang (int)
+                                    (default: readlength - 1)
+      --filterScore 		    Controls --outFilterScoreMinOverLread and outFilterMatchNminOverLread
+				    (default: 0.66)
+      --sjdOverhangMin		    Controls --alignSJDBoverhangMin (int)
+				    (default: 8)
+
+    rMATS:                              
       --statoff                     Skip the statistical analysis (bool)
+                                    If using only b1 as input, this must be turned on.
+                                    (default: false)
       --paired_stats                Use the paired stats model (bool)
-      --novelSS                     Enable detection of novel splice sites (unannotated splice sites, bool)
-      --mil                         Minimum Intron Length. Only impacts --novelSS behavior (default = 50, int)
-      --mel                         Maximum Exon Length. Only impacts --novelSS behavior (default = 500, int)
+                                    (default: false)
+      --novelSS                     Enable detection of unnanotated splice sites (bool)
+                                    (default: false)
+      --mil                         Minimum Intron Length. Only impacts --novelSS behavior (int)
+                                    (default: 50)
+      --mel                         Maximum Exon Length. Only impacts --novelSS behavior (int)
+                                    (default: 500)
 
     Other:
-      --assembly_name               Genome assembly name (available = 'GRCh38' or 'GRCm38', string)
-      --test                        For running QC, trimming and STAR only (bool)
-      --download_from               Database to download FASTQ/BAMs from (available = 'TCGA', 'GTEX' or 'SRA', string)
-      --key_file                    For downloading reads, use TCGA authentication token (TCGA) or dbGAP repository key (GTEx, path)
+      --test                        For running trim test (bool)
+                                    (default: false)
       --max_cpus                    Maximum number of CPUs (int)
+                                    (default: ?)  
       --max_memory                  Maximum memory (memory unit)
+                                    (default: 80)
       --max_time                    Maximum time (time unit)
+                                    (default: ?)
       --skiprMATS                   Skip rMATS (bool)
+                                    (default: false)
       --skipMultiQC                 Skip MultiQC (bool)
+                                    (default: false)
       --outdir                      The output directory where the results will be saved (string)
+                                    (default: directory where you submit the job)
+
 
     See here for more info: https://github.com/TheJacksonLaboratory/splicing-pipelines-nf/blob/master/docs/usage.md
     """.stripIndent()
@@ -103,6 +147,8 @@ log.info "rMATS novel splice sites    : ${params.novelSS}"
 log.info "rMATS Minimum Intron Length : ${params.mil}"
 log.info "rMATS Maximum Exon Length   : ${params.mel}"
 log.info "Mismatch                    : ${params.mismatch}"
+log.info "filterScore                 : ${params.filterScore}"
+log.info "sjdOverhangMin              : ${params.sjdOverhangMin}"
 log.info "Test                        : ${params.test}"
 log.info "Download from               : ${params.download_from ? params.download_from : 'FASTQs directly provided'}"
 log.info "Key file                    : ${params.key_file ? params.key_file : 'Not provided'}"
@@ -403,7 +449,9 @@ if (!params.bams){
       --readFilesCommand zcat \
       --sjdbGTFfile $gtf \
       --sjdbOverhang $overhang \
-      --alignSJoverhangMin 8 \
+      --alignSJoverhangMin $params.sjdOverhangMin \
+      --outFilterScoreMinOverLread $params.filterScore \
+      --outFilterMatchNminOverLread $params.filterScore \
       --outFilterMismatchNmax $params.mismatch \
       --outFilterMultimapNmax 20 \
       --alignMatesGapMax 1000000 \

diff --git a/nextflow.config b/nextflow.config
@@ -5,40 +5,46 @@ manifest {
 }
 
 params {
-    // Main arguments
-    reads                    = false
-    singleEnd                = false
-    star_index               = false
-    gtf                      = false
-    assembly_name            = false
-    bams                     = false
+    // Input files 
+    reads          = false
+    bams           = false
+    rmats_pairs    = false
+    download_from  = false
+    key_file       = false
 
-    // Reads
-    stranded                 = 'first-strand'
-    rmats_pairs              = false
-    adapter                  = false
-    readlength               = false
-    overhang                 = false
-    mismatch                 = 2
-    minlen                   = 20
-    slidingwindow            = true
+    // Main arguments: 
+    gtf            = false
+    assembly_name  = false
+    star_index     = false
+    singleEnd      = false
+    stranded       = 'first-strand'
+    readlength     = false
+
+    // Trimmomatic: 
+    minlen         = 20
+    slidingwindow  = true
+    adapter        = false
+
+    // Star
+    overhang       = false
+    mismatch       = 2
+    filterScore    = 0.66
+    sjdOverhangMin = 8
 
     // rMATS
-    statoff                  = false
-    paired_stats             = false
-    novelSS                  = false
-    mil                      = 50
-    mel                      = 500
+    statoff        = false
+    paired_stats   = false
+    novelSS        = false
+    mil            = 50
+    mel            = 500
 
     // Other
-    test                     = false
-    download_from            = false
-    key_file                 = false
-    skiprMATS                = false
-    skipMultiQC              = false
-    outdir                   = 'results'
-    multiqc_config           = "$baseDir/examples/assets/multiqc_config.yaml" 
-    help                     = false
+    test           = false
+    skiprMATS      = false
+    skipMultiQC    = false
+    outdir         = 'results'
+    multiqc_config = "$baseDir/examples/assets/multiqc_config.yaml" 
+    help           = false
 }
 
 process {