-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtarget_regions.pm
132 lines (112 loc) · 4.5 KB
/
target_regions.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/perl -w
package target_regions;
use strict;
# If a delimiter has not been specified, set the default.
sub snpDelimiter {
if (!defined $main::snpDelimiter) {$main::snpDelimiter = ":";}
}
sub defineRegions {
@target_regions::targetRegions = ();
# If the user has specified that the genome should be broken
# up by chromosome, but further division has not been specified,
# assume that each chromosome is to be run in its entirety.
if ($main::divideGenome eq "c" && !defined $main::targetRegionSize) {$main::targetRegionSize = 0;}
# If no information on how to break up the genome is provided,
# use the default of calling on 100kbp chunks on each chromosome.
if (!defined $main::divideGenome) {
$main::divideGenome = "c";
if (!defined $main::targetRegionSize) {$main::targetRegionSize = 100;}
}
# Split the genome up into chromosomes for variant calling.
if ($main::divideGenome eq "c") {
# If the whole genomes are to be called on in a single run.
if ($main::targetRegionSize == 0) {
if (defined $main::referenceSequence) {
$target_regions::targetRegions[0] = $main::referenceSequence;
} else {
foreach my $chr (sort keys %{$main::refSequences}) {push(@target_regions::targetRegions, $chr);}
}
# Include the genome coordinates for each chromosome.
for (my $chr = 0; $chr < @target_regions::targetRegions; $chr++) {
my $extent = getChromosomeExtents($target_regions::targetRegions[$chr]);
$target_regions::targetRegions[$chr] = "$target_regions::targetRegions[$chr]:1-$extent";
}
# Else if the chromosomes are to be further sub-divided.
} else {
my ($chrMin, $chrMax);
my @tempArray = ();
my $kiloBase = 1000;
if (defined $main::referenceSequence) {
$tempArray[0] = $main::referenceSequence;
} else {
foreach my $chr (sort keys %{$main::refSequences}) {push(@tempArray, $chr);}
}
foreach my $chr (@tempArray) {
my $start = 0;
my $end = $kiloBase*$main::targetRegionSize;
my $extent = getChromosomeExtents($chr);
while() {
push(@target_regions::targetRegions, "$chr:$start-$end");
$start += $kiloBase*$main::targetRegionSize;
$end += $kiloBase*$main::targetRegionSize;
if ($end > $extent) {
$end = $extent;
push(@target_regions::targetRegions, "$chr:$start-$end");
last;
}
}
}
}
# Variant call on the whole genome in one pass.
} elsif ($main::divideGenome eq "w") {
print STDERR ("WHOLE GENOME CALLING NOT YET IMPLEMENTED\n");
exit(1);
# Variant call on bed regions.
} elsif ($main::divideGenome eq "b") {
print STDERR ("BED FILE CALLING NOT YET IMPLEMENTED\n");
exit(1);
# Unknown options.
} else {
print STDERR ("\n***SCRIPT TERMINATED***\n\n");
print STDERR ("Options -divide-genome can take one of the following values:\n");
print STDERR ("\tw - whole genome,\n");
print STDERR ("\tc - break up by chromosome (can only use -divide in conjunction with this),\n");
print STDERR ("\tb - define regions with a bed file.\n\n");
print STDERR ("Error in target_regions::defineRegions.\n");
exit(1);
}
}
# Use the reference dictionary file to get chromosome extents.
sub getChromosomeExtents {
my $chromosome = $_[0];
my $extent;
open(DICT, "<$main::referenceBin/$main::referenceDictionary") ||
die("Couldn't find reference dictionary file:\n\t$main::referenceBin/$main::referenceDictionary\n");
while(<DICT>) {
chomp;
if ($_ !~ /^(\@SQ|\@HD)/) {
print STDERR ("\n***SCRIPT TERMINATED***\n\n");
print STDERR ("No information on chromosome $chromosome in file:\n\t$main::referenceBin/$main::referenceDictionary\n");
exit(1);
}
if ($_ =~ /^\@SQ/) {
my $dictionaryChromosome = (split(/:/, ( (split(/\t/, $_) )[1])))[1];
if ($dictionaryChromosome =~ /^$chromosome$/) {
$extent = (split(/\t/, $_))[2];
$extent = (split(/:/, $extent))[1];
}
}
}
close(DICT);
# If the defined chromosome does not extent in the reference
# dictionsry, terminate with an error.
if (!defined $extent) {
print STDERR ("\n***SCRIPT TERMINATED***\n\n");
print STDERR ("Reference sequence \"$chromosome\" does not exist in the reference dictionary:\n");
print STDERR ("\t$main::referenceBin/$main::referenceDictionary\n");
print STDERR ("Error in target_regions::getChromosomeExtents.\n");
exit(1);
}
return $extent;
}
1;