-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfasta_generate_regions.py
42 lines (29 loc) · 1.18 KB
/
fasta_generate_regions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env python
# code downloaded from https://github.com/ekg/freebayes/blob/master/scripts/fasta_generate_regions.py
'''
Example usage:
python fasta_generate_regions.py /hpf/tools/centos6/mugqic-pipelines/source/resource/genomes/species/Homo_sapiens.GRCh37/genome/Homo_sapiens.GRCh37.fa.fai 100000 > fasta.regions
'''
import sys
if len(sys.argv) == 1:
print "usage: ", sys.argv[0], " <fasta file or index file> <region size>"
print "generates a list of freebayes/bamtools region specifiers on stdout"
print "intended for use in creating cluster jobs"
exit(1)
fasta_index_file = sys.argv[1]
if not fasta_index_file.endswith(".fai"):
fasta_index_file = fasta_index_file + ".fai"
fasta_index_file = open(fasta_index_file)
region_size = int(sys.argv[2])
for line in fasta_index_file:
fields = line.strip().split("\t")
chrom_name = fields[0]
chrom_length = int(fields[1])
region_start = 0
while region_start < chrom_length:
start = region_start
end = region_start + region_size
if end > chrom_length:
end = chrom_length
print chrom_name + ":" + str(region_start) + "-" + str(end)
region_start = end