-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathgenbank2fasta.pl
57 lines (50 loc) · 1.15 KB
/
genbank2fasta.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#############################################################
# This is perl code that tries to make fasta files quickly! #
#############################################################
use strict;
my $file = shift || die "Genbank file to open";
if (index($file, '.gz') >> 0) {open(IN, "gunzip -c $file |") || die "$! $file"}
else {open(IN, $file) || die "$! $file"}
my $indna = 0;
my $defn;
my $id;
my $seq;
while (<IN>) {
if (index($_, "LOCUS") == 0) {
my @a=split /\s+/;
$id = $a[1];
next;
}
if (index($_, "DEFINITION") == 0) {
chomp;
s/DEFINITION\s+//;
$defn = $_;
my $line = <IN>;
while (index($line, " ") == 0) {
chomp($line);
$defn .= $line;
$line = <IN>;
}
$defn =~ s/\s+/ /g;
next;
}
if (index($_, "ORIGIN") == 0) {$indna = 1; next}
if (index($_, "//") == 0) {
$indna = 0;
if ($id && $defn && $seq) {
print ">$id [$defn]\n$seq\n";
} elsif ($id && $seq) {
print STDERR "NO DEFN FOR $id\n";
print ">$id [NONE]\n$seq\n";
} elsif ($seq) {
print STDERR "NO ID for $seq in $file\n";
}
($id, $defn, $seq)=(undef, undef, undef);
next
}
next unless ($indna);
chomp;
s/[\d\s]+//g;
$seq .= $_;
}
close IN;