forked from ding-lab/misplice
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprefilter.pl
101 lines (87 loc) · 1.76 KB
/
prefilter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/perl
use strict;
use warnings;
(my $usage = <<OUT) =~ s/\t+//g;
perl prefilter dir_in info
OUT
die $usage unless @ARGV == 2;
my ($dir_in, $info) = @ARGV;
sub largest_key (\%)
{
my $in_hash = shift;
keys %$in_hash; # reset the each iterator
my ($large_key, $large_val) = (0, -1);
while (my ($key, $val) = each %$in_hash)
{
if ($val > $large_val)
{
$large_val = $val;
$large_key = $key;
}
}
return $large_key;
}
#my $dir = "allmut2";
#foreach my $input (glob("$dir_in/*.v2.filtered.2.detailed.alignment.2"))
foreach my $input (glob("$dir_in/$info"))
{
print "$input\n";
#<STDIN>;
open(IN, "$input");
open(OUT, ">$input.max");
my %hash;
my $id;
while(<IN>)
{
chomp;
#my @templ=
my @templ1=split(/\t/,$_);
#print $line,"\n";
my $nl1=scalar @templ1;
#print $nl,"\n";
#<STDIN>;
if($nl1==5)
#if($_ =~m/^MMRF/)
{
$id = $_;
}else
{
my @l = split(/\t/,);
my @number=($l[5]=~m/(\d+)\w/g);
my $start = $l[3] + $number[0];
my $end = $start + $number[1];
my $jd = $start.$end;
$hash{$id}{$jd}++;
}
}
my %mark;
foreach my $i (keys %hash)
{
$mark{$i} = largest_key(%{$hash{$i}});
}
open(IN, "$input");
while(<IN>)
{
chomp;
my @templ2=split(/\t/,$_);
#print $line,"\n";
my $nl2=scalar @templ2;
#print $nl,"\n";
if($nl2==5)
{
$id = $_;
print OUT "$_\n";
}else
{
my @l = split(/\t/,);
my @number=($l[5]=~m/(\d+)\w/g);
my $start = $l[3] + $number[0];
my $end = $start + $number[1];
my $jd = $start.$end;
if($mark{$id} eq $jd)
{
print OUT "$_\n";
}
}
}
}