diff --git a/create_interval_listV4.pl b/create_interval_listV4.pl index 314455f..48258aa 100755 --- a/create_interval_listV4.pl +++ b/create_interval_listV4.pl @@ -76,14 +76,14 @@ my $strandexon = ""; if ($Strand =~ /TRUE|True|true/){ - $strandexon = $exonarray[5]; - } - elsif ($Strand =~ /FALSE|False|false/){ + $strandexon = $exonarray[4]; + } + elsif ($Strand =~ /FALSE|False|false/){ $strandexon = "+"; - } - else{ + } + else{ usage() and exit(1) - } + } $chrom =~ s/chr//mg;#Remove chr, Un_, _random from chromosome name print EXONS_OUT "$chrom\t$start\t$end\t$strandexon"."\t$target\n";#Print chromosome and other information to output file @@ -103,13 +103,13 @@ if ($Strand =~ /TRUE|True|true/){ $strandbait = $baitarray[5]; - } - elsif ($Strand =~ /FALSE|False|false/){ - $strandbait = "+"; - } - else{ + } + elsif ($Strand =~ /FALSE|False|false/){ + $strandbait = "+"; + } + else{ usage() and exit(1) - } + } $chrbait =~ s/chr//mg;#Remove chr from chromosome name print BAITS_OUT "$chrbait\t$startbait\t$endbait\t$strandbait"."\t$targetbait\n";#Print output to file diff --git a/create_per_base_intervals.pl b/create_per_base_bed.pl similarity index 97% rename from create_per_base_intervals.pl rename to create_per_base_bed.pl index 54e4625..d1c0f95 100755 --- a/create_per_base_intervals.pl +++ b/create_per_base_bed.pl @@ -50,7 +50,7 @@ my $region = ($stop-$start); #Iterate over region and create bins open (OUTPUT, ">>", "$outputfolder/$output.per_base.bed" ) or die $!; - for (my $i=($start-1); $i<$stop; $i=($i+$binSize)){ + for (my $i=($start+1); $i<=$stop; $i=($i+$binSize)){ print OUTPUT "$chr\t" . $i . "\t" . ($i+1) . "\t$strand\t$gene\n"; } close(OUTPUT); diff --git a/make_compute5_bedfiles_calculon.sh b/make_compute5_bedfiles_calculon.sh index 725efef..3cbabab 100755 --- a/make_compute5_bedfiles_calculon.sh +++ b/make_compute5_bedfiles_calculon.sh @@ -221,13 +221,21 @@ then fi module load ngs-utils +module load BEDTools +bedtools merge -i ${baits}.bed -c 4,5 -o distinct > ${baits}.merged.bed + +if [ ! -f ${baits}.genesOnly ] +then + awk '{print $5}' ${baits}.merged.bed > ${baits}.genesOnly +fi + if [ $COVPERBASE == "true" ] then if [ ! -f ${baits}.uniq.per_base.bed ] then echo "starting to create_per_base_intervals, this may take a while" - create_per_base_intervals.pl -input ${baits}.bed -output ${NAME} -outputfolder $TMP + create_per_base_bed.pl -input ${baits}.bed -output ${NAME} -outputfolder $TMP sort -V -k1 -k2 -k3 ${TMP}/${NAME}.per_base.bed | uniq -u > ${baits}.uniq.per_base.bed rm ${TMP}/${NAME}.per_base.bed @@ -236,18 +244,15 @@ then else echo "${baits}.uniq.per_base.bed already exists, skipped!" fi - if [ ! -f ${baits}.genesOnly ] - then - awk '{print $5}' ${baits}.bed > ${baits}.genesOnly - fi + #make interval_list coverage per base cat ${phiXRef} > ${baits}.uniq.per_base.interval_list cat ${baits}.uniq.per_base.bed >> ${baits}.uniq.per_base.interval_list echo "${baits}.uniq.per_base.interval_list created" awk '{ if ($0 !~ /^@/){ - minus=($2 + 1); - print $1"\t"minus"\t"$3"\t"$4"\t"$5 + minus=($3 -1) + print $1"\t"$2"\t"minus"\t"$4"\t"$5 } else{ print $0