-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsort_pre2_next2.pl
75 lines (66 loc) · 1.56 KB
/
sort_pre2_next2.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
use strict;
use warnings;
open FILEIN, "<pl4.txt";
my %midpos;
my $NBestN=20;
my $linecount = 0;
while(<FILEIN>)
{
$linecount++;
/(\d+)-(\d+)-(\d+)-(\d+)\t\t(\d+)\n/;
my $pre1 = $1;
my $pre2 = $2;
my $next1 = $3;
my $next2 = $4;
my $count = $5;
#dcode# print STDERR "$pre1 - $pre2 - $next1 - $next2 : $count\n";
my $key = "$pre1-$pre2";
if (!exists $midpos{$key})
{
$midpos{$key} = {};
}
my $href = $midpos{$key};
my $p = "$pre1-$pre2-$next1-$next2";
$href->{$p} = $count;
# trim down
if ($linecount % 100000 == 0)
{
print STDERR ".";
foreach my $k (keys %midpos)
{
my %trimed_hash = take_only_best($midpos{$k});
$midpos{$k} = \%trimed_hash;
}
}
}
close FILEIN;
# okay. Time to report.
for (my $k=0; $k <128; $k++)
{
for (my $i=0; $i < 128; $i++)
{
next if ($k == $i);
my $href = $midpos{"$k-$i"};
my %bests = take_only_best($href);
my @best_keys = sort {$bests{$b} <=> $bests{$a}} keys %bests;
for(my $j=0; $j < scalar(keys %bests); $j++)
{
print " ", $best_keys[$j], "\t", $bests{$best_keys[$j]}, "\n";
}
}
}
sub take_only_best
{
my %nbests = %{$_[0]};
my %result;
my @keys = keys %nbests;
my @sorted_keys = sort {$nbests{$b} <=> $nbests{$a}} @keys;
# copy only first NBestN
for (my $i=0; ($i < $NBestN) and ($i < @sorted_keys); $i++)
{
my $k = $sorted_keys[$i];
$result{$k} = $nbests{$k};
}
# print STDERR "size: ", scalar(%result), "\n";
return %result;
}