Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
sujaikumar committed Apr 18, 2011
1 parent d2ef60e commit f9b84cd
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 0 deletions.
24 changes: 24 additions & 0 deletions bigmem_blat.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env perl

use strict;
use warnings;
use Getopt::Long qw(:config pass_through no_ignore_case);

my $outfile = pop @ARGV;
my $qryfile = pop @ARGV;
my $reffile = pop @ARGV;

my $num_cores = 4;
my $num_lines = 10000;

GetOptions (
"cpus|cores:i" => \$num_cores,
"lines:i" => \$num_lines,
);

my $tmpdir = "$outfile\_" . int(rand(10000));
mkdir $tmpdir or die $!;
my $parallel_command = "";
system ("fastaqual_multiline_to_singleline.pl $qryfile | split -l $num_lines -a 4 - $tmpdir/split_") and die $!;
system ("parallel -j $num_cores blat @ARGV $reffile {} {}.psl ::: $tmpdir/split_*") and die $!;
system ("cat $tmpdir/*.psl >$outfile; rm -rf $tmpdir") and die $!;
154 changes: 154 additions & 0 deletions fastaqual_select.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#!/usr/bin/perl

use strict;
use warnings;
use Getopt::Long qw(:config pass_through no_ignore_case);

my ($fastafile,$sort,$numfasta,$length,$prefix,$regexp,$headerfile,$includefile,$excludefile,$delimiter,$case) = ("-","S","","","","","","",""," ","");
GetOptions (
"fastafile:s" => \$fastafile,
"sort:s" => \$sort,
"numfasta:i" => \$numfasta,
"length:i" => \$length,
"prefix:s" => \$prefix,
"regexp:s" => \$regexp,
"includefile:s" => \$includefile,
"excludefile:s" => \$excludefile,
"delimiter:s" => \$delimiter,
"case:s" => \$case,
);

$case = uc(substr($case,0,1)) if $case;

my (%include_headers, %exclude_headers);
if ($includefile)
{
open FILE,"<$includefile" or die "Couldn't open includes file $includefile\n";
while (<FILE>)
{
chomp;
if (/^>?(\S+)\s(\d+)\s(\d+)\b/ or /^>?(\S+)_(\d+)_(\d+)\b/)
{
push @{$include_headers{$1}}, ($2 <= $3) ? [$2,$3] : [$3,$2];
} elsif (/^>?(\S+)/)
{
$include_headers{$1}=1
}
}
close FILE;
}
if ($excludefile)
{
open FILE,"<$excludefile" or die "Couldn't open excludes file $excludefile\n";
while (<FILE>)
{
if (/^>?(\S+)/)
{
$exclude_headers{$1}=1
}
}
close FILE;
}

my $seqs = &fastafile2hash($fastafile,"N",$sort);
my @sortkeys;

if (uc($sort) eq "R") {
@sortkeys = sort {$$seqs{$a}{order} <=> $$seqs{$b}{order}} keys %{$seqs};
} elsif (uc($sort) eq "A") {
@sortkeys = sort keys %{$seqs};
} else {
@sortkeys = sort {length($$seqs{$b}{seq}) <=> length($$seqs{$a}{seq})} keys %{$seqs};
}

my $num_printed = 0;
for my $chrontig (@sortkeys)
{
last if $numfasta and $numfasta == $num_printed;
if ($$seqs{$chrontig}{seq}=~ /\d+/) {
# it's a qual file
my @qual = split(/\s+/,$$seqs{$chrontig}{seq});
next if $length and scalar @qual < $length;
}
else {
next if $length and length($$seqs{$chrontig}{seq}) < $length;
}
next if $excludefile and exists $exclude_headers{$chrontig};
next if $includefile and not exists $include_headers{$chrontig};
my $toprint;
if (%include_headers and $include_headers{$chrontig} =~ /array/i)
{
for my $interval (@{$include_headers{$chrontig}})
{
$toprint = ">$prefix$chrontig";
my ($start, $stop) = @{$interval};
$toprint .= "$delimiter$start$delimiter$stop";
$toprint .= " $$seqs{$chrontig}{desc}" if $$seqs{$chrontig}{desc};
$toprint .= "\n" . substr($$seqs{$chrontig}{seq}, $start - 1, $stop - $start + 1) . "\n";
next if $regexp and $toprint !~ /$regexp/;
print $toprint;
$num_printed++;
}
}
else
{
$toprint = ">$prefix$chrontig";
$toprint .= " $$seqs{$chrontig}{desc}" if $$seqs{$chrontig}{desc};
$$seqs{$chrontig}{seq} = lc($$seqs{$chrontig}{seq}) if $case eq "L";
$$seqs{$chrontig}{seq} = uc($$seqs{$chrontig}{seq}) if $case eq "U";
$$seqs{$chrontig}{seq} =~ tr/[A-Z][a-z]/[a-z][A-Z]/ if $case eq "I";
$toprint .= "\n$$seqs{$chrontig}{seq}\n";
next if $regexp and $toprint !~ /$regexp/;
print $toprint;
$num_printed++;
}
}

#############################################################################

sub fastafile2hash
{
my $fastafile = shift @_;
my $changecase = "N";
my $order = "S"; # S = same as input, or R = random
$changecase = substr(uc(shift @_),0,1) if @_;
$order = substr(uc(shift @_),0,1) if @_;
my %sequences;
my $fh = &read_fh($fastafile);
my $seqid;
my $seq_counter;
while (<$fh>)
{
if (/^>(\S+)(.*)/) {
$seqid = $1;
$sequences{$seqid}{desc} = $2;
$sequences{$seqid}{order} = $order eq "S" ? $seq_counter++ : rand;
}
else {
if (/\d/) {
chomp($sequences{$seqid}{seq} .= " $_"); # add space to sep qual values
$sequences{$seqid}{seq} =~ s/^\s+//;
$sequences{$seqid}{seq} =~ s/\s+$//;
next;
}
chomp($sequences{$seqid}{seq} .= lc($_)) if $changecase eq "L";
chomp($sequences{$seqid}{seq} .= uc($_)) if $changecase eq "U";
chomp($sequences{$seqid}{seq} .= $_ ) if $changecase eq "N";
}
}
return \%sequences;
}

#############################################################################

sub read_fh {
my $filename = shift @_;
my $filehandle;
if ($filename =~ /gz$/) {
open $filehandle, "gunzip -dc $filename |" or die $!;
}
else {
open $filehandle, "<$filename" or die $!;
}
return $filehandle;
}
33 changes: 33 additions & 0 deletions seq_st_en_merge_overlapping.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/perl

use strict;
use warnings;

# takes as input file with:
# chrontigname_st_en or chrontigname\tst\ten, and gives back chrontig with merged intervals:

my %maskstrings;

while (<>) {
next unless /^>?\s*(\S+)[\t_ ](\d+)[\t_ ](\d+)\s*$/;

my ($chrontig, $st, $en) = ($1, $2, $3);
($st, $en) = ($en, $st) if $st > $en;

if (exists $maskstrings{$chrontig}) {
$maskstrings{$chrontig} .= "0" x ($en - length($maskstrings{$chrontig}))
}
else {
$maskstrings{$chrontig} .= "0" x $en
}
substr($maskstrings{$chrontig}, $st - 1, $en - $st + 1, "1" x ($en - $st + 1));
}

for my $id (keys %maskstrings)
{
my $mask = $maskstrings{$id};
while ($mask =~ /(1+)/g)
{
print $id . "\t" . (pos($mask) - length($1) + 1) . "\t" . pos($mask) . "\n";
}
}

0 comments on commit f9b84cd

Please sign in to comment.