nf-core · Shaun-Regenbaum · Dec 27, 2023 · Dec 27, 2023 · Dec 27, 2023 · Dec 27, 2023
diff --git a/conf/modules.config b/conf/modules.config
@@ -373,7 +373,7 @@ process {
     withName: 'SNPEFF' {
         ext.args    = '-nodownload -canon -v'
         if(!params.snpeff_cache){
-            container   = { params.genome ? "nfcore/snpeff:5.0.${params.genome}" : "nfcore/snpeff:5.0.${params.snpeff_db.split('\\.')[0]}" }
+            container   = { params.genome ? "nfcore/snpeff:5.1.${params.genome}" : "nfcore/snpeff:5.1.${params.snpeff_db.split('\\.')[0]}"}
         }
         publishDir  = [
             mode: params.publish_dir_mode,

diff --git a/modules/local/filterbed/main.nf b/modules/local/filterbed/main.nf
@@ -0,0 +1,59 @@
+process FILTERBEDFILE {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.8.3' :
+        'quay.io/biocontainers/python:3.8.3' }"
+
+     input:
+    tuple val(meta), path(bed)
+    path  dict_file
+
+    output:
+    tuple val(meta), path('filtered.bed'), emit: filtered_bed
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    python - <<PYCODE
+    import argparse
+    import os
+
+    def load_sequences_from_dict(dict_file):
+        sequences = set()
+        with open(dict_file, 'r') as file:
+            for line in file:
+                if line.startswith('@SQ'):
+                    parts = line.split('\t')
+                    for part in parts:
+                        if part.startswith('SN:'):
+                            sequences.add(part.split(':')[1])
+        return sequences
+
+    def filter_bed_file(bed_file, sequences, output_file):
+        with open(bed_file, 'r') as file, open(output_file, 'w') as out:
+            for line in file:
+                sequence = line.split('\t')[0]
+                if sequence in sequences:
+                    out.write(line)
+
+    def main(bed_file, dict_file, output_file):
+        sequences = load_sequences_from_dict(dict_file)
+        filter_bed_file(bed_file, sequences, output_file)
+        print(f"Output file {output_file} created in {os.getcwd()}")
+
+    if __name__ == "__main__":
+        main("${bed}", "${dict_file}", "filtered.bed")
+
+    PYCODE
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/modules/snpeff/build.sh b/modules/nf-core/modules/snpeff/build.sh
diff --git a/modules/nf-core/modules/snpeff/environment.yml b/modules/nf-core/modules/snpeff/environment.yml
diff --git a/modules/nf-core/modules/snpeff/main.nf b/modules/nf-core/modules/snpeff/main.nf
diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
@@ -128,10 +128,10 @@ workflow PREPARE_GENOME {
             ch_versions     = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
         }
 
-        //if((!ch_star_index) || getIndexVersion(ch_star_index) != '2.7.4a'){
-        //    ch_star_index   = STAR_GENOMEGENERATE(ch_fasta,ch_gtf).index
-        //    ch_versions     = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
-        //}
+        if((!ch_star_index) || getIndexVersion(ch_star_index) != '2.7.4a'){
+           ch_star_index   = STAR_GENOMEGENERATE(ch_fasta,ch_gtf).index
+           ch_versions     = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
+        }
     }
 
 

diff --git a/subworkflows/nf-core/genomegenerate/environment.yaml b/subworkflows/nf-core/genomegenerate/environment.yaml
diff --git a/subworkflows/nf-core/genomegenerate/main.nf b/subworkflows/nf-core/genomegenerate/main.nf
diff --git a/subworkflows/nf-core/genomegenerate/meta.yaml b/subworkflows/nf-core/genomegenerate/meta.yaml
diff --git a/workflows/rnavar.nf b/workflows/rnavar.nf
@@ -47,9 +47,11 @@ ch_rnavar_logo           = Channel.fromPath(file("$projectDir/assets/nf-core-rna
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { INPUT_CHECK                   } from '../subworkflows/local/input_check'              // Validate the input samplesheet.csv and prepare input channels
-include { PREPARE_GENOME                } from '../subworkflows/local/prepare_genome'           // Build the genome index and other reference files
-include { ANNOTATE                      } from '../subworkflows/local/annotate'                 // Annotate variants using snpEff or VEP or both
+include { INPUT_CHECK                   } from '../subworkflows/local/input_check'          // Validate the input samplesheet.csv and prepare input channels
+include { PREPARE_GENOME                } from '../subworkflows/local/prepare_genome'       // Build the genome index and other reference files
+include { ANNOTATE                      } from '../subworkflows/local/annotate'             // Annotate variants using snpEff or VEP or both
+include { FILTERBEDFILE                 } from '../modules/local/filterbed/main'            // Filter a BED file based on the available genome.dict file to prevent errors for extra chromosomes
+
 
 /*
 ========================================================================================
@@ -78,6 +80,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS   } from '../modules/nf-core/modules/custo
 */
 
 include { ALIGN_STAR                    } from '../subworkflows/nf-core/align_star'         // Align reads to genome and sort and index the alignment file
+include { STAR_GENOMEGENERATE } from '../subworkflows/nf-core/genomegenerate' // Generate genome index for STAR
 include { MARKDUPLICATES                } from '../subworkflows/nf-core/markduplicates'     // Mark duplicates in the BAM file
 include { SPLITNCIGAR                   } from '../subworkflows/nf-core/splitncigar'        // Splits reads that contain Ns in their cigar string
 include { RECALIBRATE                   } from '../subworkflows/nf-core/recalibrate'        // Estimate and correct systematic bias
@@ -176,12 +179,22 @@ workflow RNAVAR {
     ch_reports  = ch_reports.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
     ch_versions = ch_versions.mix(FASTQC.out.versions.first())
 
+    //
+    // PROCESS: Filter BED file before BedToIntervalList
+    //
+    ch_filtered_genome_bed = Channel.empty()
+    FILTERBEDFILE (
+        ch_genome_bed,       // This should be the channel containing your exome.bed file
+        PREPARE_GENOME.out.dict      // This should be the channel containing your genome.dict file
+    )
+    ch_filtered_genome_bed = FILTERBEDFILE.out.filtered_bed
+
     //
     // MODULE: Prepare the interval list from the GTF file using GATK4 BedToIntervalList
     //
     ch_interval_list = Channel.empty()
     GATK4_BEDTOINTERVALLIST(
-        ch_genome_bed,
+        ch_filtered_genome_bed,
         PREPARE_GENOME.out.dict
     )
     ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list
@@ -212,6 +225,15 @@ workflow RNAVAR {
     ch_aligner_clustering_multiqc = Channel.empty()
 
     if (params.aligner == 'star') {
+
+        // Generate the STAR index
+        // STAR_GENOMEGENERATE (
+        //     PREPARE_GENOME.out.fasta,
+        //     PREPARE_GENOME.out.gtf,
+
+        // )
+        // ch_star_index = STAR_GENOMEGENERATE.out.index
+
         ALIGN_STAR (
             ch_cat_fastq,
             PREPARE_GENOME.out.star_index,