forked from mtokuyama/ERVmap
-
Notifications
You must be signed in to change notification settings - Fork 0
/
normalize_with_file.pl
90 lines (71 loc) · 1.57 KB
/
normalize_with_file.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env perl
#
# test read_in_tab_delim_file_to_hash
#
use warnings;
use strict;
my $norm = shift;
my $fn = shift;
my $sizecol = shift // 1; # col of size factor
my $test = 0;
my $n = read_in_tab_delim_file_to_hash($norm, 0);
if ($test) {
for my $k (keys %{$n}) {
print STDERR
join("\t",
">$k<",
$n->{$k}[1],
), "\n";
}
}
my $fh = get_in_fh($fn);
my @sizes = ();
while (my $li = <$fh>) {
chomp($li);
my @t = split(/\t/, $li);
if ($t[0] eq "gene") {
# print "$t[-1]\t", scalar @t, "\n";
for (my $i=1; $i<@t; $i++) {
$sizes[$i] = $n->{ $t[$i] }[$sizecol];
print STDERR "$t[$i]\t$sizes[$i]\n";
}
print "$li\n";
next;
}
print "$t[0]\t";
for (my $i=1; $i<@t; $i++) {
printf("%.3f\t", $t[$i]/$sizes[$i]);
}
print "\n";
}
close($fh);
for (my $i=1; $i<@sizes; $i++) {
print STDERR join("\t",
$i,
$sizes[$i],
), "\n";
}
# Use "," seperated column indices to construct key; value is the array of all
# columns. If 3rd arg is not used, default is "|"
sub read_in_tab_delim_file_to_hash {
my ($fn, $col_list, $key_delim, ) = @_;
my %h = ();
my @keyinx = split(/,/, $col_list);
$key_delim = "|" unless ($key_delim);
my $fh = &get_in_fh($fn);
while (my $li = <$fh>) {
chomp($li);
next if ($li =~ /^\#/);
my @t = split(/\t/, $li);
my $key = join($key_delim, map($t[$_], @keyinx));
$h{ $key } = [ @t ];
}
close($fh);
return \%h;
}
sub get_in_fh {
my ($fn, ) = @_;
my $fh;
open($fh, "<$fn") || die "cannot open file $fn!";
return $fh;
}