Skip to content

Commit

Permalink
Decreasing disk stress and processing time of the contig step by repl…
Browse files Browse the repository at this point in the history
…acing temporary bam files by pipes. The drawback is that it is harder to control the maximum number of used cores.

The piped version of RGCRG-ANTISENSE ran ~9m on 2.4G mouse reads compared to ~25m for the original version. Timing/testing was done without the python script and running it without nextflow. The resulting *.bedgraph files were identical.
  • Loading branch information
karl616 committed Nov 13, 2017
1 parent c1da3e4 commit 54ed419
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 34 deletions.
19 changes: 10 additions & 9 deletions templates/contig/RGCRG-ANTISENSE
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
#!/bin/bash -eu
samtools view -h -@ ${cpus} ${bam} | \
awk 'BEGIN {OFS="\\t"} {if (\$1!~/^@/) {\$2=xor(\$2,0x10)}; print}' | \
samtools view -@ ${cpus} -Sb - > tmp.bam

bamtools filter -tag NH:1 -in tmp.bam -out tmp_unique.bam && rm tmp.bam
genomeCoverageBed -strand + -split -bg -ibam tmp_unique.bam > ${prefix}.plusRaw.bedgraph
genomeCoverageBed -strand - -split -bg -ibam tmp_unique.bam > ${prefix}.minusRaw.bedgraph
rm tmp_unique.bam
samtools view -h -@ ${cpus} ${bam} \
| awk 'BEGIN {OFS="\\t"} {if (\$1!~/^@/) {\$2=xor(\$2,0x10)}; print}' \
| samtools view -Sbu - \
| tee >(
genomeCoverageBed -strand + -split -bg -ibam - \
> ${prefix}.contigs.plusRaw.bedgraph
) \
| genomeCoverageBed -strand - -split -bg -ibam - \
> ${prefix}.contigs.minusRaw.bedgraph

contigsNew.py --chrFile ${genomeFai} \
--fileP ${prefix}.plusRaw.bedgraph \
--fileM ${prefix}.minusRaw.bedgraph \
--sortOut \
> ${prefix}.bed
> ${prefix}.bed
23 changes: 14 additions & 9 deletions templates/contig/RGCRG-MATE1_SENSE
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
#!/bin/bash -eu
samtools view -h -@ ${cpus} ${bam} | \
awk 'BEGIN {OFS="\\t"} {if (\$1!~/^@/ && and(\$2,128)>0) {\$2=xor(\$2,0x10)}; print}' | \
samtools view -@ ${cpus} -Sb - > tmp.bam

bamtools filter -tag NH:1 -in tmp.bam -out tmp_unique.bam && rm tmp.bam
genomeCoverageBed -strand + -split -bg -ibam tmp_unique.bam > ${prefix}.plusRaw.bedgraph
genomeCoverageBed -strand - -split -bg -ibam tmp_unique.bam > ${prefix}.minusRaw.bedgraph
rm tmp_unique.bam
samtools view -h -@ ${cpus} ${bam} \
| awk '
BEGIN {OFS="\\t"}
{if (\$1!~/^@/ && and(\$2,128)>0) {\$2=xor(\$2,0x10)}; print}
' \
| samtools view -Sbu - \
| bamtools filter -tag NH:1 \
| tee >(
genomeCoverageBed -strand + -split -bg -ibam - \
> ${prefix}.plusRaw.bedgraph
) \
| genomeCoverageBed -strand - -split -bg -ibam - \
> ${prefix}.minusRaw.bedgraph

contigsNew.py --chrFile ${genomeFai} \
--fileP ${prefix}.plusRaw.bedgraph \
--fileM ${prefix}.minusRaw.bedgraph \
--sortOut \
> ${prefix}.bed
> ${prefix}.bed
21 changes: 13 additions & 8 deletions templates/contig/RGCRG-MATE2_SENSE
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
#!/bin/bash -eu
samtools view -h -@ ${cpus} ${bam} | \
awk 'BEGIN {OFS="\\t"} {if (\$1!~/^@/ && and(\$2,64)>0) {\$2=xor(\$2,0x10)}; print}' | \
samtools view -@ ${cpus} -Sb - > tmp.bam

bamtools filter -tag NH:1 -in tmp.bam -out tmp_unique.bam && rm tmp.bam
genomeCoverageBed -strand + -split -bg -ibam tmp_unique.bam > ${prefix}.plusRaw.bedgraph
genomeCoverageBed -strand - -split -bg -ibam tmp_unique.bam > ${prefix}.minusRaw.bedgraph
rm tmp_unique.bam
samtools view -h -@ ${cpus} ${bam} \
| awk '
BEGIN {OFS="\\t"}
{if (\$1!~/^@/ && and(\$2,64)>0) {\$2=xor(\$2,0x10)}; print}
' \
| samtools view -Sbu \
| bamtools filter -tag NH:1 \
| tee >(
genomeCoverageBed -strand + -split -bg -ibam - \
> ${prefix}.plusRaw.bedgraph
) \
| genomeCoverageBed -strand - -split -bg -ibam - \
> ${prefix}.minusRaw.bedgraph

contigsNew.py --chrFile ${genomeFai} \
--fileP ${prefix}.plusRaw.bedgraph \
Expand Down
8 changes: 5 additions & 3 deletions templates/contig/RGCRG-NONE
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/bin/bash -eu
bamtools filter -tag NH:1 -in ${bam} -out tmp_unique.bam
bamToBed -i tmp_unique.bam | sort -T. -k1,1 -k2,2n | mergeBed > ${prefix}.bed
rm tmp_unique.bam
bamtools filter -tag NH:1 -in ${bam} \
| bamToBed -i - \
| sort -T. -k1,1 -k2,2n \
| mergeBed \
> ${prefix}.bed
13 changes: 8 additions & 5 deletions templates/contig/RGCRG-SENSE
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#!/bin/bash -eu
bamtools filter -tag NH:1 -in ${bam} -out tmp_unique.bam
genomeCoverageBed -strand + -split -bg -ibam tmp_unique.bam > ${prefix}.plusRaw.bedgraph
genomeCoverageBed -strand - -split -bg -ibam tmp_unique.bam > ${prefix}.minusRaw.bedgraph
rm tmp_unique.bam
bamtools filter -tag NH:1 -in ${bam} \
| tee >(
genomeCoverageBed -strand + -split -bg -ibam - \
> ${prefix}.plusRaw.bedgraph
) \
| genomeCoverageBed -strand - -split -bg -ibam - \
> ${prefix}.minusRaw.bedgraph

contigsNew.py --chrFile ${genomeFai} \
--fileP ${prefix}.plusRaw.bedgraph \
--fileM ${prefix}.minusRaw.bedgraph \
--sortOut \
> ${prefix}.bed
> ${prefix}.bed

0 comments on commit 54ed419

Please sign in to comment.