diff --git a/workflow/rules/findcircrna.smk b/workflow/rules/findcircrna.smk index d3be18b..8a832f7 100644 --- a/workflow/rules/findcircrna.smk +++ b/workflow/rules/findcircrna.smk @@ -1354,185 +1354,6 @@ bash {output.merge_bash_script} """ -# if RUN_DCC and RUN_MAPSPLICE and RUN_NCLSCAN: -# rule merge_per_sample: -# input: -# unpack(get_per_sample_files_to_merge) -# output: -# merged_counts=join(WORKDIR,"results","{sample}","{sample}.circRNA_counts.txt.gz"), -# params: -# script=join(SCRIPTS_DIR,"_merge_per_sample_counts_table.py"), -# samplename="{sample}", -# peorse=get_peorse, -# reffa=REF_FA, -# minreadcount=config['minreadcount'] # this filter is redundant as inputs are already pre-filtered. -# envmodules: -# TOOLS["python37"]["version"] -# shell:""" -# set -exo pipefail -# outdir=$(dirname {output.merged_counts}) - -# parameters=" --circExplorer {input.circExplorer}" -# parameters="$parameters --ciri {input.CIRI}" -# parameters="$parameters --dcc {input.DCC}" -# parameters="$parameters --mapsplice {input.MapSplice}" -# if [[ "{params.peorse}" == "PE" ]]; then # NCLscan is run only if the sample is PE -# parameters="$parameters --nclscan {input.NCLscan}" -# fi -# parameters="$parameters --min_read_count_reqd {params.minreadcount}" -# parameters="$parameters --reffa {params.reffa}" -# parameters="$parameters --samplename {params.samplename} -o {output.merged_counts}" - -# echo "python {params.script} $parameters" -# python {params.script} $parameters -# """ -# elif RUN_DCC and not RUN_MAPSPLICE and not RUN_NCLSCAN: -# rule merge_per_sample: -# input: -# unpack(get_per_sample_files_to_merge) -# output: -# merged_counts=join(WORKDIR,"results","{sample}","{sample}.circRNA_counts.txt.gz"), -# params: -# script=join(SCRIPTS_DIR,"_merge_per_sample_counts_table.py"), -# samplename="{sample}", -# peorse=get_peorse, -# reffa=REF_FA, -# minreadcount=config['minreadcount'] # this filter is redundant as inputs are already pre-filtered. -# envmodules: -# TOOLS["python37"]["version"] -# shell:""" -# set -exo pipefail -# outdir=$(dirname {output.merged_counts}) - -# parameters=" --circExplorer {input.circExplorer}" -# parameters="$parameters --ciri {input.CIRI}" -# parameters="$parameters --dcc {input.DCC}" -# parameters="$parameters --min_read_count_reqd {params.minreadcount}" -# parameters="$parameters --reffa {params.reffa}" -# parameters="$parameters --samplename {params.samplename} -o {output.merged_counts}" - -# echo "python {params.script} $parameters" -# python {params.script} $parameters -# """ -# elif RUN_DCC and RUN_MAPSPLICE and not RUN_NCLSCAN: -# rule merge_per_sample: -# input: -# unpack(get_per_sample_files_to_merge) -# output: -# merged_counts=join(WORKDIR,"results","{sample}","{sample}.circRNA_counts.txt.gz"), -# params: -# script=join(SCRIPTS_DIR,"_merge_per_sample_counts_table.py"), -# samplename="{sample}", -# peorse=get_peorse, -# reffa=REF_FA, -# minreadcount=config['minreadcount'] # this filter is redundant as inputs are already pre-filtered. -# envmodules: -# TOOLS["python37"]["version"] -# shell:""" -# set -exo pipefail -# outdir=$(dirname {output.merged_counts}) - -# parameters=" --circExplorer {input.circExplorer}" -# parameters="$parameters --ciri {input.CIRI}" -# parameters="$parameters --dcc {input.DCC}" -# parameters="$parameters --mapsplice {input.MapSplice}" -# parameters="$parameters --min_read_count_reqd {params.minreadcount}" -# parameters="$parameters --reffa {params.reffa}" -# parameters="$parameters --samplename {params.samplename} -o {output.merged_counts}" - -# echo "python {params.script} $parameters" -# python {params.script} $parameters -# """ -# elif not RUN_DCC and RUN_MAPSPLICE and not RUN_NCLSCAN: -# rule merge_per_sample: -# input: -# unpack(get_per_sample_files_to_merge) -# output: -# merged_counts=join(WORKDIR,"results","{sample}","{sample}.circRNA_counts.txt.gz"), -# params: -# script=join(SCRIPTS_DIR,"_merge_per_sample_counts_table.py"), -# samplename="{sample}", -# peorse=get_peorse, -# reffa=REF_FA, -# minreadcount=config['minreadcount'] # this filter is redundant as inputs are already pre-filtered. -# envmodules: -# TOOLS["python37"]["version"] -# shell:""" -# set -exo pipefail -# outdir=$(dirname {output.merged_counts}) - -# parameters=" --circExplorer {input.circExplorer}" -# parameters="$parameters --ciri {input.CIRI}" -# parameters="$parameters --mapsplice {input.MapSplice}" -# parameters="$parameters --min_read_count_reqd {params.minreadcount}" -# parameters="$parameters --reffa {params.reffa}" -# parameters="$parameters --samplename {params.samplename} -o {output.merged_counts}" - -# echo "python {params.script} $parameters" -# python {params.script} $parameters -# """ -# elif not RUN_DCC and not RUN_MAPSPLICE and RUN_NCLSCAN: -# rule merge_per_sample: -# input: -# unpack(get_per_sample_files_to_merge) -# output: -# merged_counts=join(WORKDIR,"results","{sample}","{sample}.circRNA_counts.txt.gz"), -# params: -# script=join(SCRIPTS_DIR,"_merge_per_sample_counts_table.py"), -# samplename="{sample}", -# peorse=get_peorse, -# reffa=REF_FA, -# minreadcount=config['minreadcount'] # this filter is redundant as inputs are already pre-filtered. -# envmodules: -# TOOLS["python37"]["version"] -# shell:""" -# set -exo pipefail -# outdir=$(dirname {output.merged_counts}) - -# parameters=" --circExplorer {input.circExplorer}" -# parameters="$parameters --ciri {input.CIRI}" -# if [[ "{params.peorse}" == "PE" ]]; then # NCLscan is run only if the sample is PE -# parameters="$parameters --nclscan {input.NCLscan}" -# fi -# parameters="$parameters --min_read_count_reqd {params.minreadcount}" -# parameters="$parameters --reffa {params.reffa}" -# parameters="$parameters --samplename {params.samplename} -o {output.merged_counts}" - -# echo "python {params.script} $parameters" -# python {params.script} $parameters -# """ -# else: # DCC, MapSplice and NCLScan are all off! -# rule merge_per_sample: -# input: -# unpack(get_per_sample_files_to_merge) -# output: -# merged_counts=join(WORKDIR,"results","{sample}","{sample}.circRNA_counts.txt.gz"), -# params: -# script=join(SCRIPTS_DIR,"_merge_per_sample_counts_table.py"), -# samplename="{sample}", -# peorse=get_peorse, -# reffa=REF_FA, -# minreadcount=config['minreadcount'] # this filter is redundant as inputs are already pre-filtered. -# envmodules: -# TOOLS["python37"]["version"] -# shell:""" -# set -exo pipefail -# outdir=$(dirname {output.merged_counts}) - -# parameters=" --circExplorer {input.circExplorer}" -# parameters="$parameters --ciri {input.CIRI}" -# parameters="$parameters --min_read_count_reqd {params.minreadcount}" -# parameters="$parameters --reffa {params.reffa}" -# parameters="$parameters --samplename {params.samplename} -o {output.merged_counts}" - -# echo "python {params.script} $parameters" -# python {params.script} $parameters -# """ - - -# localrules: -# create_master_counts_file, - # rule create_master_counts_file: # merge all per-sample counts tables into a single giant counts matrix and annotate it with known circRNA databases diff --git a/workflow/rules/post_align_processing.smk b/workflow/rules/post_align_processing.smk index faea673..21b0bc5 100644 --- a/workflow/rules/post_align_processing.smk +++ b/workflow/rules/post_align_processing.smk @@ -1,92 +1,3 @@ -# rule split_splice_reads_BAM_create_BW: -# input: -# bam=rules.create_spliced_reads_bam.output.bam -# output: -# bam=join(WORKDIR,"results","{sample}","STAR2p","{sample}.spliced_reads."+HOST+".bam"), -# bw=join(WORKDIR,"results","{sample}","STAR2p","{sample}.spliced_reads."+HOST+".bw") -# params: -# sample="{sample}", -# workdir=WORKDIR, -# memG=getmemG("split_splice_reads_BAM_create_BW"), -# outdir=join(WORKDIR,"results","{sample}","STAR2p"), -# regions=REF_REGIONS, -# randomstr=str(uuid.uuid4()) -# threads: getthreads("split_splice_reads_BAM_create_BW") -# envmodules: TOOLS["samtools"]["version"],TOOLS["bedtools"]["version"],TOOLS["ucsc"]["version"],TOOLS["sambamba"]["version"] -# shell:""" -# if [ -d /lscratch/${{SLURM_JOB_ID}} ];then -# TMPDIR="/lscratch/${{SLURM_JOB_ID}}" -# else -# TMPDIR="/dev/shm/{params.randomstr}" -# mkdir -p $TMPDIR -# fi -# cd {params.outdir} -# bam_basename="$(basename {input.bam})" -# while read a b;do -# bam="${{bam_basename%.*}}.${{a}}.bam" -# samtools view {input.bam} $b -b > ${{TMPDIR}}/${{bam%.*}}.tmp.bam -# sambamba sort --memory-limit={params.memG} --tmpdir=${{TMPDIR}} --nthreads={threads} --out=$bam ${{TMPDIR}}/${{bam%.*}}.tmp.bam -# bw="${{bam%.*}}.bw" -# bdg="${{bam%.*}}.bdg" -# sizes="${{bam%.*}}.sizes" -# bedtools genomecov -bga -split -ibam $bam > $bdg -# bedSort $bdg $bdg -# if [ "$(wc -l $bdg|awk '{{print $1}}')" != "0" ];then -# samtools view -H $bam|grep ^@SQ|cut -f2,3|sed "s/SN://g"|sed "s/LN://g" > $sizes -# bedGraphToBigWig $bdg $sizes $bw -# else -# touch $bw -# fi -# rm -f $bdg $sizes -# done < {params.regions} -# cd $TMPDIR && rm -f * -# """ - -# rule split_linear_reads_BAM_create_BW: -# # This rule is identical to split_splice_reads_BAM_create_BW, "spliced_reads" is replaced by "linear_reads" -# input: -# bam=rules.create_linear_reads_bam.output.bam -# output: -# bam=join(WORKDIR,"results","{sample}","STAR2p","{sample}.linear_reads."+HOST+".bam"), -# bw=join(WORKDIR,"results","{sample}","STAR2p","{sample}.linear_reads."+HOST+".bw") -# params: -# sample="{sample}", -# workdir=WORKDIR, -# memG=getmemG("split_linear_reads_BAM_create_BW"), -# outdir=join(WORKDIR,"results","{sample}","STAR2p"), -# regions=REF_REGIONS, -# randomstr=str(uuid.uuid4()) -# threads: getthreads("split_linear_reads_BAM_create_BW") -# envmodules: TOOLS["samtools"]["version"],TOOLS["bedtools"]["version"],TOOLS["ucsc"]["version"],TOOLS["sambamba"]["version"] -# shell:""" -# if [ -d /lscratch/${{SLURM_JOB_ID}} ];then -# TMPDIR="/lscratch/${{SLURM_JOB_ID}}" -# else -# TMPDIR="/dev/shm/{params.randomstr}" -# mkdir -p $TMPDIR -# fi -# cd {params.outdir} -# bam_basename="$(basename {input.bam})" -# while read a b;do -# bam="${{bam_basename%.*}}.${{a}}.bam" -# samtools view {input.bam} $b -b > ${{TMPDIR}}/${{bam%.*}}.tmp.bam -# sambamba sort --memory-limit={params.memG} --tmpdir=${{TMPDIR}} --nthreads={threads} --out=$bam ${{TMPDIR}}/${{bam%.*}}.tmp.bam -# bw="${{bam%.*}}.bw" -# bdg="${{bam%.*}}.bdg" -# sizes="${{bam%.*}}.sizes" -# bedtools genomecov -bga -split -ibam $bam > $bdg -# bedSort $bdg $bdg -# if [ "$(wc -l $bdg|awk '{{print $1}}')" != "0" ];then -# samtools view -H $bam|grep ^@SQ|cut -f2,3|sed "s/SN://g"|sed "s/LN://g" > $sizes -# bedGraphToBigWig $bdg $sizes $bw -# else -# touch $bw -# fi -# rm -f $bdg $sizes -# done < {params.regions} -# cd $TMPDIR && rm -f * -# """ - localrules: merge_genecounts, diff --git a/workflow/rules/post_findcircrna_processing.smk b/workflow/rules/post_findcircrna_processing.smk index 84a0387..908fd9e 100644 --- a/workflow/rules/post_findcircrna_processing.smk +++ b/workflow/rules/post_findcircrna_processing.smk @@ -529,224 +529,3 @@ for bam in $(ls {params.samplename}.*.HQ_only.BSJ.bam);do fi done """ - - - -# localrules: venn -# rule venn: -# input: -# circexplorerout=rules.circExplorer.output.annotations, -# ciriout=rules.ciri.output.cirioutfiltered -# output: -# png=join(WORKDIR,"results","{sample}","{sample}.venn_mqc.png"), -# cirionly=join(WORKDIR,"results","{sample}","{sample}.cirionly.lst"), -# circexploreronly=join(WORKDIR,"results","{sample}","{sample}.circexploreronly.lst"), -# common=join(WORKDIR,"results","{sample}","{sample}.common.lst") -# params: -# script1=join(SCRIPTS_DIR,"venn.R"), -# sample="{sample}", -# outdir=join(WORKDIR,"results","{sample}") -# container: "docker://nciccbr/ccbr_venn:latest" -# threads: getthreads("venn") -# shell:""" -# set -exo pipefail -# cut -f1 {input.ciriout}|grep -v circRNA_ID > /dev/shm/{params.sample}.ciri.lst -# cut -f1-3 {input.circexplorerout}|awk -F"\\t" '{{print $1\":\"$2+1\"|\"$3}}' > /dev/shm/{params.sample}.circExplorer.lst -# if [[ "$(cat /dev/shm/{params.sample}.ciri.lst | wc -l)" != "0" ]];then -# if [[ "$(cat /dev/shm/{params.sample}.circExplorer.lst | wc -l)" != "0" ]];then -# 2set_venn.R \\ -# -l /dev/shm/{params.sample}.ciri.lst \\ -# -r /dev/shm/{params.sample}.circExplorer.lst \\ -# -p {output.png} \\ -# -m {output.cirionly} \\ -# -s {output.circexploreronly} \\ -# -c1 "CIRI2" \\ -# -c2 "CircExplorer2" \\ -# -c {output.common} \\ -# -t {params.sample} -# else -# for o in {output} -# do -# touch $o -# done -# fi -# fi -# rm -f /dev/shm{params.sample}* -# """ - - -# rule add_strand_to_circExplorer: -# input: -# backsplicedjunctions=rules.circExplorer.output.backsplicedjunctions, -# bed=rules.split_BAM_create_BW.output.bed -# output: -# backsplicedjunctions=join(WORKDIR,"results","{sample}","circExplorer","{sample}.back_spliced_junction.bed"), -# params: -# sample="{sample}", -# workdir=WORKDIR, -# pythonscript=join(SCRIPTS_DIR,"copy_strand_info.py"), -# threads: 2 -# # envmodules: -# shell:""" -# cp {input.backsplicedjunctions} /dev/shm -# bsj_basename="$(basename {input.backsplicedjunctions})" -# python {params.pythonscript} --from {input.bed} --to /dev/shm/${{bsj_basename}} --output {output.backsplicedjunctions} -# """ - -# rule recall_valid_BSJ_split_BAM_by_strand_create_BW: -# input: -# bam=rules.create_circExplorer_BSJ_bam.output.BSJbam, -# output: -# bam=join(WORKDIR,"results","{sample}","STAR2p","BSJs","{sample}.BSJ."+HOST+".bam"), -# plusbam=join(WORKDIR,"results","{sample}","STAR2p","BSJs","{sample}.BSJ."+HOST+".plus.bam"), -# minusbam=join(WORKDIR,"results","{sample}","STAR2p","BSJs","{sample}.BSJ."+HOST+".minus.bam"), -# bed=join(WORKDIR,"results","{sample}","STAR2p","BSJs","{sample}.BSJ."+HOST+".bed"), -# bw=join(WORKDIR,"results","{sample}","STAR2p","BSJs","{sample}.BSJ."+HOST+".bw"), -# plusbw=join(WORKDIR,"results","{sample}","STAR2p","BSJs","{sample}.BSJ."+HOST+".plus.bw"), -# minusbw=join(WORKDIR,"results","{sample}","STAR2p","BSJs","{sample}.BSJ."+HOST+".minus.bw"), -# validBSJbed=join(WORKDIR,"results","{sample}","customBSJs","{sample}.valid_STAR_BSJ.bed") -# params: -# sample="{sample}", -# workdir=WORKDIR, -# memG=getmemG("recall_valid_BSJ_split_BAM_by_strand_create_BW"), -# outdir=join(WORKDIR,"results","{sample}","STAR2p","BSJs"), -# pythonscript=join(SCRIPTS_DIR,"validate_BSJ_reads_and_split_BSJ_bam_by_strand.py"), -# regions=REF_REGIONS, -# randomstr=str(uuid.uuid4()) -# threads: getthreads("recall_valid_BSJ_split_BAM_by_strand_create_BW") -# envmodules: TOOLS["samtools"]["version"],TOOLS["bedtools"]["version"],TOOLS["ucsc"]["version"],TOOLS["sambamba"]["version"],TOOLS["python37"]["version"] -# shell:""" -# set -exo pipefail -# if [ -d /lscratch/${{SLURM_JOB_ID}} ];then -# TMPDIR="/lscratch/${{SLURM_JOB_ID}}" -# else -# TMPDIR="/dev/shm/{params.randomstr}" -# mkdir -p $TMPDIR -# fi -# cd {params.outdir} -# bam_basename="$(basename {input.bam})" -# while read a b;do -# bam="${{bam_basename%.*}}.${{a}}.bam" -# plusbam="${{bam_basename%.*}}.${{a}}.plus.bam" -# minusbam="${{bam_basename%.*}}.${{a}}.minus.bam" -# bed="${{bam_basename%.*}}.${{a}}.bed" -# samtools view {input.bam} $b -b > ${{TMPDIR}}/${{bam%.*}}.tmp.bam -# sambamba sort --memory-limit={params.memG} --tmpdir=${{TMPDIR}} --nthreads={threads} --out=$bam ${{TMPDIR}}/${{bam%.*}}.tmp.bam -# bw="${{bam%.*}}.bw" -# bdg="${{bam%.*}}.bdg" -# plusbw="${{bam%.*}}.plus.bw" -# plusbdg="${{bam%.*}}.plus.bdg" -# minusbw="${{bam%.*}}.minus.bw" -# minusbdg="${{bam%.*}}.minus.bdg" -# sizes="${{bam%.*}}.sizes" -# # create strand specific bams -# python {params.pythonscript} \ -# -i $bam \ -# -p $plusbam \ -# -m $minusbam \ -# -b $bed -# bedSort $bed $bed -# # create bedgraphs -# bedtools genomecov -bga -split -ibam $bam > $bdg -# bedSort $bdg $bdg -# bedtools genomecov -bga -split -ibam $plusbam > $plusbdg -# bedSort $plusbdg $plusbdg -# bedtools genomecov -bga -split -ibam $minusbam > $minusbdg -# bedSort $minusbdg $minusbdg -# # create bigwigs -# if [ "$(wc -l $bdg|awk '{{print $1}}')" != "0" ];then -# samtools view -H $bam|grep ^@SQ|cut -f2,3|sed "s/SN://g"|sed "s/LN://g" > $sizes -# bedGraphToBigWig $bdg $sizes $bw -# if [ "$(wc -l $plusbdg|awk '{{print $1}}')" != "0" ];then -# bedGraphToBigWig $plusbdg $sizes $plusbw -# else -# touch $plusbw -# fi -# if [ "$(wc -l $minusbdg|awk '{{print $1}}')" != "0" ];then -# bedGraphToBigWig $minusbdg $sizes $minusbw -# else -# touch $minusbw -# fi -# else -# touch $bw -# fi -# rm -f $bdg $sizes - -# done < {params.regions} -# cat {params.sample}.BSJ.*.bed |cut -f1-6 > ${{TMPDIR}}/{sample}.tmp.bed -# bedSort ${{TMPDIR}}/{sample}.tmp.bed ${{TMPDIR}}/{sample}.tmp.bed -# awk -F"\\t" -v OFS="\\t" '{{$4="S"NR;print}}' ${{TMPDIR}}/{sample}.tmp.bed > {output.validBSJbed} -# cd $TMPDIR && rm -f * -# """ - -# rule add_novel_ciri_BSJs_to_customBSJ: -# input: -# ciriout=rules.ciri.output.ciriout, -# hg38bed=rules.recall_valid_BSJ_split_BAM_by_strand_create_BW.output.bed, -# validBSJbed=rules.recall_valid_BSJ_split_BAM_by_strand_create_BW.output.validBSJbed, -# output: -# novelBSJ=join(WORKDIR,"results","{sample}","customBSJs","{sample}.novel_CIRI_BSJ.bed"), -# cirireadids=temp(join(WORKDIR,"results","{sample}","customBSJs","{sample}.ciri.readids")), -# combinedBSJ=join(WORKDIR,"results","{sample}","customBSJs","{sample}.BSJ.bed") -# params: -# sample="{sample}", -# outdir=join(WORKDIR,"results","{sample}","customBSJs"), -# list_compare_script=join(SCRIPTS_DIR,"_compare_lists.py"), -# randomstr=str(uuid.uuid4()) -# envmodules: TOOLS["ucsc"]["version"],TOOLS["python37"]["version"] -# shell:""" -# set -exo pipefail -# if [ -d /lscratch/${{SLURM_JOB_ID}} ];then -# TMPDIR="/lscratch/${{SLURM_JOB_ID}}" -# else -# TMPDIR="/dev/shm/{params.randomstr}" -# mkdir -p $TMPDIR -# fi -# cd {params.outdir} -# grep -v junction_reads_ID {input.ciriout}|awk -F"\\t" '{{print substr($NF,1,length($NF)-1)}}'|tr ',' '\\n'|sort|uniq > {params.sample}.ciri.readids -# stardir=$(dirname {input.hg38bed}) -# cat ${{stardir}}/{params.sample}.BSJ.*.bed|awk -F"\\t" '{{print $NF}}'|tr ',' '\\n'|awk -F"##" '{{print $1}}'|sort|uniq > {params.sample}.circExplorer.readids -# python {params.list_compare_script} {params.sample}.ciri.readids {params.sample}.circExplorer.readids 1 -# mv a_only.lst {params.sample}.ciri.novel.readids && rm -f a_* b_* -# head -n1 {input.ciriout} > {params.sample}.ciri.novel.out -# while read a;do grep $a {input.ciriout};done < {params.sample}.ciri.novel.readids |sort|uniq >> {params.sample}.ciri.novel.out -# grep -v circRNA_ID {params.sample}.ciri.novel.out | awk -F"\\t" -v OFS="\\t" '{{print $2,$3-1,$4,$2":"$3-1"-"$4,$5,$11}}' > ${{TMPDIR}}/{params.sample}.novel_CIRI_BSJ.tmp.bed -# awk -F"\\t" '{{print $4}}' ${{TMPDIR}}/{params.sample}.novel_CIRI_BSJ.tmp.bed > ${{TMPDIR}}/{params.sample}.novel_CIRI_BSJ.tmp.lst -# awk -F"\\t" '{{print $1":"$2"-"$3}}' {input.validBSJbed} > ${{TMPDIR}}/{params.sample}.circExplorer.BSJ.lst -# python {params.list_compare_script} ${{TMPDIR}}/{params.sample}.novel_CIRI_BSJ.tmp.lst ${{TMPDIR}}/{params.sample}.circExplorer.BSJ.lst 1 -# mv a_only.lst {params.sample}.ciri.novel.BSJ.lst && rm -f a_* b_* -# while read a;do grep $a ${{TMPDIR}}/{params.sample}.novel_CIRI_BSJ.tmp.bed;done < {params.sample}.ciri.novel.BSJ.lst > ${{TMPDIR}}/{params.sample}.ciri.novel.BSJ.bed -# bedSort ${{TMPDIR}}/{params.sample}.ciri.novel.BSJ.bed ${{TMPDIR}}/{params.sample}.ciri.novel.BSJ.bed -# awk -F"\\t" -v OFS="\\t" '{{$4="C"NR;print}}' ${{TMPDIR}}/{params.sample}.ciri.novel.BSJ.bed > {output.novelBSJ} -# cat {input.validBSJbed} {output.novelBSJ} > {output.combinedBSJ} -# bedSort {output.combinedBSJ} {output.combinedBSJ} -# rm -f {params.sample}.circExplorer.readids {params.sample}.ciri.novel.* -# cd $TMPDIR && rm -f * -# """ -# rule filter_ciri_bam_for_BSJs: -# input: -# bam=rules.ciri.output.ciribam, -# ciriout=rules.ciri.output.cirioutfiltered, -# readids=rules.add_novel_ciri_BSJs_to_customBSJ.output.cirireadids -# output: -# bam=join(WORKDIR,"results","{sample}","ciri","{sample}.bwa.BSJ.bam") -# params: -# sample="{sample}", -# memG=getmemG("filter_ciri_bam_for_BSJs"), -# script=join(SCRIPTS_DIR,"filter_bam_by_readids.py"), -# randomstr=str(uuid.uuid4()) -# threads: getthreads("filter_ciri_bam_for_BSJs") -# envmodules: TOOLS["python37"]["version"],TOOLS["sambamba"]["version"] -# shell:""" -# set -exo pipefail -# if [ -d /lscratch/${{SLURM_JOB_ID}} ];then -# TMPDIR="/lscratch/${{SLURM_JOB_ID}}" -# else -# TMPDIR="/dev/shm/{params.randomstr}" -# mkdir -p $TMPDIR -# fi -# sambamba sort --memory-limit={params.memG} --tmpdir=${{TMPDIR}} --nthreads={threads} --out=${{TMPDIR}}/{params.sample}.ciri.sorted.bam {input.bam} -# python {params.script} --inputBAM ${{TMPDIR}}/{params.sample}.ciri.sorted.bam --outputBAM ${{TMPDIR}}/{params.sample}.tmp.bam --readids {input.readids} -# sambamba sort --memory-limit={params.memG} --tmpdir=${{TMPDIR}} --nthreads={threads} --out={output.bam} ${{TMPDIR}}/{params.sample}.tmp.bam -# cd $TMPDIR && rm -f * -# """