diff --git a/conf/modules.config b/conf/modules.config index 61cf9ba1..45d56500 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -373,7 +373,7 @@ process { withName: 'SNPEFF' { ext.args = '-nodownload -canon -v' if(!params.snpeff_cache){ - container = { params.genome ? "nfcore/snpeff:5.0.${params.genome}" : "nfcore/snpeff:5.0.${params.snpeff_db.split('\\.')[0]}" } + container = { params.genome ? "nfcore/snpeff:5.1.${params.genome}" : "nfcore/snpeff:5.1.${params.snpeff_db.split('\\.')[0]}"} } publishDir = [ mode: params.publish_dir_mode, diff --git a/modules/local/filterbed/main.nf b/modules/local/filterbed/main.nf new file mode 100644 index 00000000..6bb7c2b3 --- /dev/null +++ b/modules/local/filterbed/main.nf @@ -0,0 +1,59 @@ +process FILTERBEDFILE { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'quay.io/biocontainers/python:3.8.3' }" + + input: + tuple val(meta), path(bed) + path dict_file + + output: + tuple val(meta), path('filtered.bed'), emit: filtered_bed + + when: + task.ext.when == null || task.ext.when + + script: + """ + python - < versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/snpeff/build.sh b/modules/nf-core/modules/snpeff/build.sh index 2fccf9a8..d16fd939 100644 --- a/modules/nf-core/modules/snpeff/build.sh +++ b/modules/nf-core/modules/snpeff/build.sh @@ -17,9 +17,11 @@ build_push() { docker push nfcore/snpeff:${SNPEFF_TAG}.${GENOME} } - -build_push "GRCh37" "75" "5.0" -build_push "GRCh38" "99" "5.0" -build_push "GRCm38" "99" "5.0" -build_push "CanFam3.1" "99" "5.0" -build_push "WBcel235" "99" "5.0" +build_push "CanFam3.1" "99" "5.1" +build_push "GRCh37" "87" "5.1" +build_push "GRCh38" "105" "5.1" +build_push "GRCm38" "99" "5.1" +build_push "GRCm39" "105" "5.1" +build_push "R64-1-1" "105" "5.1" +build_push "UMD3.1" "75" "5.1" +build_push "WBcel235" "105" "5.1" diff --git a/modules/nf-core/modules/snpeff/environment.yml b/modules/nf-core/modules/snpeff/environment.yml index ad0523fb..e0527690 100644 --- a/modules/nf-core/modules/snpeff/environment.yml +++ b/modules/nf-core/modules/snpeff/environment.yml @@ -1,10 +1,10 @@ # You can use this file to create a conda environment for this module: # conda env create -f environment.yml -name: nf-core-snpeff-5.0 +name: nf-core-snpeff-5.1 channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::snpeff=5.0 + - bioconda::snpeff=5.1 diff --git a/modules/nf-core/modules/snpeff/main.nf b/modules/nf-core/modules/snpeff/main.nf index 1b4d5f43..ba02fbe9 100644 --- a/modules/nf-core/modules/snpeff/main.nf +++ b/modules/nf-core/modules/snpeff/main.nf @@ -2,10 +2,11 @@ process SNPEFF { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) + conda (params.enable_conda ? "bioconda::snpeff=5.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : - 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : + 'quay.io/biocontainers/snpeff:5.1--hdfd78af_2' }" + input: tuple val(meta), path(vcf) diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index e4760b57..fd7db408 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -128,10 +128,10 @@ workflow PREPARE_GENOME { ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) } - //if((!ch_star_index) || getIndexVersion(ch_star_index) != '2.7.4a'){ - // ch_star_index = STAR_GENOMEGENERATE(ch_fasta,ch_gtf).index - // ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) - //} + if((!ch_star_index) || getIndexVersion(ch_star_index) != '2.7.4a'){ + ch_star_index = STAR_GENOMEGENERATE(ch_fasta,ch_gtf).index + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + } } diff --git a/subworkflows/nf-core/genomegenerate/environment.yaml b/subworkflows/nf-core/genomegenerate/environment.yaml new file mode 100644 index 00000000..93e4476a --- /dev/null +++ b/subworkflows/nf-core/genomegenerate/environment.yaml @@ -0,0 +1,11 @@ +name: star_genomegenerate + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::samtools=1.18 + - bioconda::star=2.7.10a + - conda-forge::gawk=5.1.0 diff --git a/subworkflows/nf-core/genomegenerate/main.nf b/subworkflows/nf-core/genomegenerate/main.nf new file mode 100644 index 00000000..b8855715 --- /dev/null +++ b/subworkflows/nf-core/genomegenerate/main.nf @@ -0,0 +1,119 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + + output: + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } + + stub: + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } +} diff --git a/subworkflows/nf-core/genomegenerate/meta.yaml b/subworkflows/nf-core/genomegenerate/meta.yaml new file mode 100644 index 00000000..1061e1b8 --- /dev/null +++ b/subworkflows/nf-core/genomegenerate/meta.yaml @@ -0,0 +1,53 @@ +name: star_genomegenerate +description: Create index for STAR +keywords: + - index + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: directory + description: Folder containing the star index files + pattern: "star" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/workflows/rnavar.nf b/workflows/rnavar.nf index 34688263..ec4a2504 100644 --- a/workflows/rnavar.nf +++ b/workflows/rnavar.nf @@ -47,9 +47,11 @@ ch_rnavar_logo = Channel.fromPath(file("$projectDir/assets/nf-core-rna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { INPUT_CHECK } from '../subworkflows/local/input_check' // Validate the input samplesheet.csv and prepare input channels -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' // Build the genome index and other reference files -include { ANNOTATE } from '../subworkflows/local/annotate' // Annotate variants using snpEff or VEP or both +include { INPUT_CHECK } from '../subworkflows/local/input_check' // Validate the input samplesheet.csv and prepare input channels +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' // Build the genome index and other reference files +include { ANNOTATE } from '../subworkflows/local/annotate' // Annotate variants using snpEff or VEP or both +include { FILTERBEDFILE } from '../modules/local/filterbed/main' // Filter a BED file based on the available genome.dict file to prevent errors for extra chromosomes + /* ======================================================================================== @@ -78,6 +80,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custo */ include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' // Align reads to genome and sort and index the alignment file +include { STAR_GENOMEGENERATE } from '../subworkflows/nf-core/genomegenerate' // Generate genome index for STAR include { MARKDUPLICATES } from '../subworkflows/nf-core/markduplicates' // Mark duplicates in the BAM file include { SPLITNCIGAR } from '../subworkflows/nf-core/splitncigar' // Splits reads that contain Ns in their cigar string include { RECALIBRATE } from '../subworkflows/nf-core/recalibrate' // Estimate and correct systematic bias @@ -176,12 +179,22 @@ workflow RNAVAR { ch_reports = ch_reports.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // + // PROCESS: Filter BED file before BedToIntervalList + // + ch_filtered_genome_bed = Channel.empty() + FILTERBEDFILE ( + ch_genome_bed, // This should be the channel containing your exome.bed file + PREPARE_GENOME.out.dict // This should be the channel containing your genome.dict file + ) + ch_filtered_genome_bed = FILTERBEDFILE.out.filtered_bed + // // MODULE: Prepare the interval list from the GTF file using GATK4 BedToIntervalList // ch_interval_list = Channel.empty() GATK4_BEDTOINTERVALLIST( - ch_genome_bed, + ch_filtered_genome_bed, PREPARE_GENOME.out.dict ) ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list @@ -212,6 +225,15 @@ workflow RNAVAR { ch_aligner_clustering_multiqc = Channel.empty() if (params.aligner == 'star') { + + // Generate the STAR index + // STAR_GENOMEGENERATE ( + // PREPARE_GENOME.out.fasta, + // PREPARE_GENOME.out.gtf, + + // ) + // ch_star_index = STAR_GENOMEGENERATE.out.index + ALIGN_STAR ( ch_cat_fastq, PREPARE_GENOME.out.star_index,