-
Notifications
You must be signed in to change notification settings - Fork 1
/
selectCandidate.py
36 lines (29 loc) · 1.15 KB
/
selectCandidate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Selecting the best HLA consensus sequences from a samtools pileup consensus FASTA
import click
import copy
from Bio import SeqIO
@click.command(context_settings = dict( help_option_names = ['-h', '--help'] ))
@click.option('--fasta','-f', type=str, help='multi FASTA input file to select candidates from')
@click.option('--length','-l', type=int, help='length of the shortest contig')
def getFastaRecords(fasta,length):
"""Selecting only particular entries from a multi-FASTA (right now only considering length)"""
frs = list()
for rec in SeqIO.parse(fasta, "fasta"):
if len(rec) >= length:
frs.append(rec)
printSeqs(makePairs(frs))
def printSeqs(records):
for seq in records:
print seq.format("fasta")
def makePairs(candidates):
# we are expecting a list here, not a generator
pairs = []
if len(candidates) == 1:
clone = copy.deepcopy(candidates[0])
clone.id = clone.id + "clone"
clone.name = clone.name + "clone"
clone.description = clone.description + "clone"
candidates.append(clone)
return candidates
if __name__ == '__main__':
getFastaRecords()