Skip to content

Commit 806aeed

Browse files
committedJun 2, 2021
Fixed regex for non-sprot databases
1 parent 497dcdd commit 806aeed

File tree

2 files changed

+43
-13
lines changed

2 files changed

+43
-13
lines changed
 

‎.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.DS_Store

‎parse_hmmtbl.pl

+42-13
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#!/usr/bin/perl
22
## Pombert Lab, 2018
33
my $name = 'parse_hmmtbl.pl';
4-
my $version = '0.2b';
5-
my $updated = '2021-04-06';
4+
my $version = '0.3';
5+
my $updated = '2021-06-02';
66

77
use strict; use warnings; use Getopt::Long qw (GetOptions);
88

@@ -32,23 +32,52 @@
3232

3333
## Print table header
3434
open OUT, ">", "$table" or die "Can't create file $table: $!\n";
35-
print OUT "Query\tTarget\tE-value\tProduct\tGenus\tSpecies\tOS descriptor\n";
35+
my $hflag = 0;
3636

3737
## Parsing files
3838
while (my $file = shift@tbl){
3939
open IN, "<", "$file" or die "Can't read file $file: $!\n";
4040
while (my $line = <IN>){
4141
chomp $line;
42-
if ($line =~ /^#/){ next;} ## Skipping comments
43-
## File format is space separated, must use a regex...
44-
elsif ($line =~ /^(\S+)\s+-\s+(\S+)\s+-\s+(\S+e-\d+)(?:\s+\S+){6}\s+(?:\d+\s+){7}(.*)\sOS=(.*?)(?:OX|GN|PE|SV)=/){
45-
my $target = $1;
46-
my $query = $2;
47-
my $evalue = $3;
48-
my $product = $4;
49-
my $OS = $5;
50-
my ($species,$genus) = $OS =~ /^((\S+)\s+\S+)/;
51-
print OUT "$query\t$target\t$evalue\t$product\t$genus\t$species\t$OS\n";
42+
if ($line =~ /^#/){ next; } ## Skipping comments
43+
else {
44+
## File format is space separated, must use a regex...
45+
my @columns = split (/\s+/, $line);
46+
47+
my $target = $columns[0];
48+
my $taccession = $columns[1];
49+
my $query = $columns[2];
50+
my $qaccession = $columns[3];
51+
my $fevalue = $columns[4];
52+
my $fscore = $columns[5];
53+
my $fbias = $columns[6];
54+
my $devalue = $columns[7];
55+
my $dscore = $columns[8];
56+
my $dbias = $columns[9];
57+
## 10-17 domain number estimation
58+
59+
my $description;
60+
for my $num (18..$#columns){
61+
$description .= "$columns[$num] ";
62+
}
63+
64+
if ($description =~ /(.*)\sOS=(.*?)(?:OX|GN|PE|SV)=/){
65+
my $product = $1;
66+
my $OS = $2;
67+
my ($species,$genus) = $OS =~ /^((\S+)\s+\S+)/;
68+
if ($hflag == 0){
69+
print OUT "Query\tTarget\tE-value\tProduct\tGenus\tSpecies\tOS descriptor\n";
70+
$hflag = 1;
71+
}
72+
print OUT "$query\t$target\t$fevalue\t$product\t$genus\t$species\t$OS\n";
73+
}
74+
else {
75+
if ($hflag == 0){
76+
print OUT "Query\tTarget\tE-value\tDescription\n";
77+
$hflag = 1;
78+
}
79+
print OUT "$query\t$target\t$fevalue\t$description\n";
80+
}
5281
}
5382
}
5483
}

0 commit comments

Comments
 (0)