|
1 | 1 | #!/usr/bin/perl
|
2 | 2 | ## Pombert Lab, 2018
|
3 | 3 | my $name = 'parse_hmmtbl.pl';
|
4 |
| -my $version = '0.2b'; |
5 |
| -my $updated = '2021-04-06'; |
| 4 | +my $version = '0.3'; |
| 5 | +my $updated = '2021-06-02'; |
6 | 6 |
|
7 | 7 | use strict; use warnings; use Getopt::Long qw (GetOptions);
|
8 | 8 |
|
|
32 | 32 |
|
33 | 33 | ## Print table header
|
34 | 34 | open OUT, ">", "$table" or die "Can't create file $table: $!\n";
|
35 |
| -print OUT "Query\tTarget\tE-value\tProduct\tGenus\tSpecies\tOS descriptor\n"; |
| 35 | +my $hflag = 0; |
36 | 36 |
|
37 | 37 | ## Parsing files
|
38 | 38 | while (my $file = shift@tbl){
|
39 | 39 | open IN, "<", "$file" or die "Can't read file $file: $!\n";
|
40 | 40 | while (my $line = <IN>){
|
41 | 41 | chomp $line;
|
42 |
| - if ($line =~ /^#/){ next;} ## Skipping comments |
43 |
| - ## File format is space separated, must use a regex... |
44 |
| - elsif ($line =~ /^(\S+)\s+-\s+(\S+)\s+-\s+(\S+e-\d+)(?:\s+\S+){6}\s+(?:\d+\s+){7}(.*)\sOS=(.*?)(?:OX|GN|PE|SV)=/){ |
45 |
| - my $target = $1; |
46 |
| - my $query = $2; |
47 |
| - my $evalue = $3; |
48 |
| - my $product = $4; |
49 |
| - my $OS = $5; |
50 |
| - my ($species,$genus) = $OS =~ /^((\S+)\s+\S+)/; |
51 |
| - print OUT "$query\t$target\t$evalue\t$product\t$genus\t$species\t$OS\n"; |
| 42 | + if ($line =~ /^#/){ next; } ## Skipping comments |
| 43 | + else { |
| 44 | + ## File format is space separated, must use a regex... |
| 45 | + my @columns = split (/\s+/, $line); |
| 46 | + |
| 47 | + my $target = $columns[0]; |
| 48 | + my $taccession = $columns[1]; |
| 49 | + my $query = $columns[2]; |
| 50 | + my $qaccession = $columns[3]; |
| 51 | + my $fevalue = $columns[4]; |
| 52 | + my $fscore = $columns[5]; |
| 53 | + my $fbias = $columns[6]; |
| 54 | + my $devalue = $columns[7]; |
| 55 | + my $dscore = $columns[8]; |
| 56 | + my $dbias = $columns[9]; |
| 57 | + ## 10-17 domain number estimation |
| 58 | + |
| 59 | + my $description; |
| 60 | + for my $num (18..$#columns){ |
| 61 | + $description .= "$columns[$num] "; |
| 62 | + } |
| 63 | + |
| 64 | + if ($description =~ /(.*)\sOS=(.*?)(?:OX|GN|PE|SV)=/){ |
| 65 | + my $product = $1; |
| 66 | + my $OS = $2; |
| 67 | + my ($species,$genus) = $OS =~ /^((\S+)\s+\S+)/; |
| 68 | + if ($hflag == 0){ |
| 69 | + print OUT "Query\tTarget\tE-value\tProduct\tGenus\tSpecies\tOS descriptor\n"; |
| 70 | + $hflag = 1; |
| 71 | + } |
| 72 | + print OUT "$query\t$target\t$fevalue\t$product\t$genus\t$species\t$OS\n"; |
| 73 | + } |
| 74 | + else { |
| 75 | + if ($hflag == 0){ |
| 76 | + print OUT "Query\tTarget\tE-value\tDescription\n"; |
| 77 | + $hflag = 1; |
| 78 | + } |
| 79 | + print OUT "$query\t$target\t$fevalue\t$description\n"; |
| 80 | + } |
52 | 81 | }
|
53 | 82 | }
|
54 | 83 | }
|
0 commit comments