Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding a filtering step to handle unplaced chromosomal sequences. V2 #114

Open
wants to merge 18 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ process {
withName: 'SNPEFF' {
ext.args = '-nodownload -canon -v'
if(!params.snpeff_cache){
container = { params.genome ? "nfcore/snpeff:5.0.${params.genome}" : "nfcore/snpeff:5.0.${params.snpeff_db.split('\\.')[0]}" }
container = { params.genome ? "nfcore/snpeff:5.1.${params.genome}" : "nfcore/snpeff:5.1.${params.snpeff_db.split('\\.')[0]}"}
}
publishDir = [
mode: params.publish_dir_mode,
Expand Down
59 changes: 59 additions & 0 deletions modules/local/filterbed/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
process FILTERBEDFILE {
tag "$meta.id"
label 'process_medium'

conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.8.3' :
'quay.io/biocontainers/python:3.8.3' }"

input:
tuple val(meta), path(bed)
path dict_file

output:
tuple val(meta), path('filtered.bed'), emit: filtered_bed

when:
task.ext.when == null || task.ext.when

script:
"""
python - <<PYCODE
import argparse
import os

def load_sequences_from_dict(dict_file):
sequences = set()
with open(dict_file, 'r') as file:
for line in file:
if line.startswith('@SQ'):
parts = line.split('\t')
for part in parts:
if part.startswith('SN:'):
sequences.add(part.split(':')[1])
return sequences

def filter_bed_file(bed_file, sequences, output_file):
with open(bed_file, 'r') as file, open(output_file, 'w') as out:
for line in file:
sequence = line.split('\t')[0]
if sequence in sequences:
out.write(line)

def main(bed_file, dict_file, output_file):
sequences = load_sequences_from_dict(dict_file)
filter_bed_file(bed_file, sequences, output_file)
print(f"Output file {output_file} created in {os.getcwd()}")

if __name__ == "__main__":
main("${bed}", "${dict_file}", "filtered.bed")

PYCODE

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
END_VERSIONS
"""
}
14 changes: 8 additions & 6 deletions modules/nf-core/modules/snpeff/build.sh

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions modules/nf-core/modules/snpeff/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions modules/nf-core/modules/snpeff/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ workflow PREPARE_GENOME {
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
}

//if((!ch_star_index) || getIndexVersion(ch_star_index) != '2.7.4a'){
// ch_star_index = STAR_GENOMEGENERATE(ch_fasta,ch_gtf).index
// ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
//}
if((!ch_star_index) || getIndexVersion(ch_star_index) != '2.7.4a'){
ch_star_index = STAR_GENOMEGENERATE(ch_fasta,ch_gtf).index
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
}
}


Expand Down
11 changes: 11 additions & 0 deletions subworkflows/nf-core/genomegenerate/environment.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

119 changes: 119 additions & 0 deletions subworkflows/nf-core/genomegenerate/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 53 additions & 0 deletions subworkflows/nf-core/genomegenerate/meta.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 26 additions & 4 deletions workflows/rnavar.nf
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ ch_rnavar_logo = Channel.fromPath(file("$projectDir/assets/nf-core-rna
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { INPUT_CHECK } from '../subworkflows/local/input_check' // Validate the input samplesheet.csv and prepare input channels
include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' // Build the genome index and other reference files
include { ANNOTATE } from '../subworkflows/local/annotate' // Annotate variants using snpEff or VEP or both
include { INPUT_CHECK } from '../subworkflows/local/input_check' // Validate the input samplesheet.csv and prepare input channels
include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' // Build the genome index and other reference files
include { ANNOTATE } from '../subworkflows/local/annotate' // Annotate variants using snpEff or VEP or both
include { FILTERBEDFILE } from '../modules/local/filterbed/main' // Filter a BED file based on the available genome.dict file to prevent errors for extra chromosomes


/*
========================================================================================
Expand Down Expand Up @@ -78,6 +80,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custo
*/

include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' // Align reads to genome and sort and index the alignment file
include { STAR_GENOMEGENERATE } from '../subworkflows/nf-core/genomegenerate' // Generate genome index for STAR
include { MARKDUPLICATES } from '../subworkflows/nf-core/markduplicates' // Mark duplicates in the BAM file
include { SPLITNCIGAR } from '../subworkflows/nf-core/splitncigar' // Splits reads that contain Ns in their cigar string
include { RECALIBRATE } from '../subworkflows/nf-core/recalibrate' // Estimate and correct systematic bias
Expand Down Expand Up @@ -176,12 +179,22 @@ workflow RNAVAR {
ch_reports = ch_reports.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
ch_versions = ch_versions.mix(FASTQC.out.versions.first())

//
// PROCESS: Filter BED file before BedToIntervalList
//
ch_filtered_genome_bed = Channel.empty()
FILTERBEDFILE (
ch_genome_bed, // This should be the channel containing your exome.bed file
PREPARE_GENOME.out.dict // This should be the channel containing your genome.dict file
)
ch_filtered_genome_bed = FILTERBEDFILE.out.filtered_bed

//
// MODULE: Prepare the interval list from the GTF file using GATK4 BedToIntervalList
//
ch_interval_list = Channel.empty()
GATK4_BEDTOINTERVALLIST(
ch_genome_bed,
ch_filtered_genome_bed,
PREPARE_GENOME.out.dict
)
ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list
Expand Down Expand Up @@ -212,6 +225,15 @@ workflow RNAVAR {
ch_aligner_clustering_multiqc = Channel.empty()

if (params.aligner == 'star') {

// Generate the STAR index
// STAR_GENOMEGENERATE (
// PREPARE_GENOME.out.fasta,
// PREPARE_GENOME.out.gtf,

// )
// ch_star_index = STAR_GENOMEGENERATE.out.index

ALIGN_STAR (
ch_cat_fastq,
PREPARE_GENOME.out.star_index,
Expand Down