Skip to content

Commit

Permalink
Merge pull request #17 from TRON-Bioinformatics/split-multimapper
Browse files Browse the repository at this point in the history
Adapt SplitNCigarReads command to also split multimappers
  • Loading branch information
priesgo committed Jun 13, 2024
2 parents a9c44b2 + df964bd commit bff8119
Show file tree
Hide file tree
Showing 14 changed files with 27 additions and 19 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ Optional input:
* --skip_metrics: optionally skip metrics (default: false)
* --output: the folder where to publish output (default: ./output)
* --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA)
* --split_cigarn: split reads that contain Ns in their cigar string (e.g. spanning splicing events in RNAseq data) using GATKs SplitNCigarReads
* --split_cigarn_args: additional arguments for SplitNCigarReads
Computational resources:
* --prepare_bam_cpus: (default: 3)
Expand Down
4 changes: 3 additions & 1 deletion modules/02_mark_duplicates.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ params.split_reads_cpus = 2
params.split_reads_memory = "4g"
params.remove_duplicates = true
params.output = 'output'
params.split_cigarn_args = ""


process MARK_DUPLICATES {
Expand Down Expand Up @@ -76,7 +77,8 @@ process SPLIT_CIGAR_N_READS {
--input ${bam} \
--output ${name}.split_cigarn.bam \
--create-output-bam-index true \
--reference ${reference}
--reference ${reference} \
${params.split_cigarn_args}
cp ${name}.split_cigarn.bai ${name}.split_cigarn.bam.bai
Expand Down
18 changes: 11 additions & 7 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
profiles {
conda { params.enable_conda = true }
debug { process.beforeScript = 'echo $HOSTNAME' }
test {
params.input_files = "test_data/test_input.txt"
params.reference = "$baseDir/test_data/ucsc.hg19.minimal.fasta"
ci {
params.prepare_bam_cpus = 1
params.prepare_bam_memory = "3g"
params.mark_duplicates_cpus = 1
Expand All @@ -23,15 +21,19 @@ profiles {
params.metrics_memory = "3g"
params.index_cpus = 1
params.index_memory = "3g"
params.known_indels1 = "$baseDir/test_data/1000G_phase1.indels.hg19.sites.minimal.vcf"
params.known_indels2 = "$baseDir/test_data/Mills_and_1000G_gold_standard.indels.hg19.sites.sorted.minimal.vcf"
params.intervals = "$baseDir/test_data/minimal_intervals.bed"
params.dbsnp = "$baseDir/test_data/dbsnp_138.hg19.minimal.vcf"
timeline.enabled = false
report.enabled = false
trace.enabled = false
dag.enabled = false
}
test {
params.input_files = "test_data/test_input.txt"
params.reference = "$baseDir/test_data/ucsc.hg19.minimal.fasta"
params.known_indels1 = "$baseDir/test_data/1000G_phase1.indels.hg19.sites.minimal.vcf"
params.known_indels2 = "$baseDir/test_data/Mills_and_1000G_gold_standard.indels.hg19.sites.sorted.minimal.vcf"
params.intervals = "$baseDir/test_data/minimal_intervals.bed"
params.dbsnp = "$baseDir/test_data/dbsnp_138.hg19.minimal.vcf"
}
}

// Export this variable to prevent local Python libraries from conflicting with those in the container
Expand Down Expand Up @@ -91,6 +93,8 @@ Optional input:
* --skip_metrics: optionally skip metrics (default: false)
* --output: the folder where to publish output (default: ./output)
* --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA)
* --split_cigarn: split reads that contain Ns in their cigar string (e.g. spanning splicing events in RNAseq data) using GATKs SplitNCigarReads
* --split_cigarn_args: additional arguments for SplitNCigarReads
Computational resources:
* --prepare_bam_cpus: (default: 3)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_01.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test1
nextflow main.nf -profile test,conda --output $output
nextflow main.nf -profile test,conda,ci --output $output

test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_02.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test2
nextflow main.nf -profile test,conda --output $output --skip_bqsr
nextflow main.nf -profile test,conda,ci --output $output --skip_bqsr

test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_03.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test3
nextflow main.nf -profile test,conda --output $output --skip_realignment
nextflow main.nf -profile test,conda,ci --output $output --skip_realignment

test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_04.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test4
nextflow main.nf -profile test,conda --output $output --skip_deduplication
nextflow main.nf -profile test,conda,ci --output $output --skip_deduplication

test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_05.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test5
nextflow main.nf -profile test,conda --output $output --skip_deduplication --skip_bqsr --skip_metrics --known_indels1 false --known_indels2 false
nextflow main.nf -profile test,conda,ci --output $output --skip_deduplication --skip_bqsr --skip_metrics --known_indels1 false --known_indels2 false

test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_06.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test6
nextflow main.nf -profile test,conda --output $output --intervals false --skip_deduplication --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda,ci --output $output --intervals false --skip_deduplication --skip_bqsr --skip_realignment

test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_07.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test7
nextflow main.nf -profile test,conda --output $output --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda,ci --output $output --skip_bqsr --skip_realignment

test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_08.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test8
nextflow main.nf -profile test,conda --output $output --collect_hs_metrics_min_base_quality 10 \
nextflow main.nf -profile test,conda,ci --output $output --collect_hs_metrics_min_base_quality 10 \
--collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment

test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing BAM file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_09.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test9
nextflow main.nf -profile test,conda --output $output --skip_deduplication --skip_bqsr --skip_realignment \
nextflow main.nf -profile test,conda,ci --output $output --skip_deduplication --skip_bqsr --skip_realignment \
--input_files false --input_bam test_data/TESTX_S1_L001.bam

test -s $output/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 9 output file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_10.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test10
nextflow main.nf -profile test,conda --output $output --skip_realignment --split_cigarn
nextflow main.nf -profile test,conda,ci --output $output --skip_realignment --split_cigarn

test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
Expand Down
2 changes: 1 addition & 1 deletion tests/test_11.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

source tests/assert.sh
output=output/test11
nextflow main.nf -profile test,conda --output $output --reference `pwd`/test_data/ucsc.hg19.minimal.without_indices.fasta
nextflow main.nf -profile test,conda,ci --output $output --reference `pwd`/test_data/ucsc.hg19.minimal.without_indices.fasta

test -s `pwd`/test_data/ucsc.hg19.minimal.without_indices.fasta.fai || { echo "Missing output FAI index!"; exit 1; }
test -s `pwd`/test_data/ucsc.hg19.minimal.without_indices.dict || { echo "Missing output dict index!"; exit 1; }

0 comments on commit bff8119

Please sign in to comment.