-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathall2many.pl
executable file
·58 lines (47 loc) · 1.19 KB
/
all2many.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/local/bin/perl
#
# Usage information updated January 21, 2005 - JDW
unless (@ARGV == 2) {
print <<EOH;
Usage: $0 input.file min_size_contig
Separates a set of FASTA-format sequences in the file named
as the first argument into individual files, each of which is
at least as long as the second argument. Sequences shorter
than the second argument are ignored. The name of each new
file is the name of the contig.
Examples:
$0 fasta.screen.contigs 1000
creates individual files from the input file fasta.screen.contigs
for all contigs at least 1000 bases long.
$0 another.fasta.file 1
creates individual files from the input file another.fasta.file
for all contigs.
EOH
exit 0;
}
sub dump_seq {
my($name, $seq) = @_;
$name1 = $name;
$name1 =~ s/>//;
$name1 =~ s/^(\S*).*$/$1/;
open HUNK,">$name1.fa" or die $!;
print HUNK "$name \n";
print HUNK $seq;
close HUNK;
}
open BIG,$ARGV[0] or die $!;
while (<BIG>) {
if ($_ =~ /Contig|^>/) {
if ($len >= $ARGV[1]) {
dump_seq($name, $seq);
}
chomp;
$name = $_;
$seq = ''; $len = 0;
} else {
$seq .= $_;
$len += length($_) - 1;
}
}
dump_seq($name, $seq);
close BIG;