-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmask_polyA.pl
executable file
·42 lines (28 loc) · 997 Bytes
/
mask_polyA.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/perl -w
## replace any run of A's longer than n (default 10) with as many N's
use strict;
use Getopt::Long;
use Bio::SeqIO;
my($in, $out, $n, $help);
$n=10;
my $usage="Usage: mask_polyA.pl --in input.fa --out output.fa -n number of As (default: 10)\n" ;
GetOptions('i|in=s' => \$in,
'o|out=s'=> \$out,
'n=i' => \$n,
'h|help' => \$help
) || die $usage;
die $usage if $help || !$in || !$out ;
my $regexp = "([Aa]{$n,})";
my $input = Bio::SeqIO->new(-file => $in , '-format' => 'fasta');
my $output = Bio::SeqIO->new(-file=> "> $out", '-format' => 'fasta');
while ( my $seq=$input->next_seq() ) {
my $s=$seq->seq();
## $s= join("", map { $_ = 'N' x length($_) if /$regexp/; $_} split(/$regexp/, $s)); # slightly slower
while ( $s =~ /$regexp/g ){
my $x = $&;
$x =~ s/[Aa]/N/g;
$s = substr($s,0,pos($s) - length($x)).$x.substr($s,pos($s),length($s) - pos($s));
}
$seq->seq($s);
$output->write_seq($seq);
}