Skip to content

Commit

Permalink
patch 2 for pdb entry
Browse files Browse the repository at this point in the history
  • Loading branch information
SchwarzMarek committed Sep 4, 2019
1 parent 9bf92b6 commit 0e614f3
Showing 1 changed file with 24 additions and 14 deletions.
38 changes: 24 additions & 14 deletions rna_blast_analyze/BR_core/extend_hits.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,23 @@ def load_genome(root_dir, accession, start, end):
return next(SeqIO.parse(ff, format='fasta'))[start:end]


def match_acc(hit_id, blast_regexp):
# get a index name to the blastdb
hname = re.search(blast_regexp, hit_id)

if hname and 'pdb|' in hit_id:
# case when sequence is from pdb e.g. gi|1276810701|pdb|5VT0|R
# but it is available in the database under 5VT0_R
bdb_accession = hname.group().replace('|', '_')
elif hname is not None:
bdb_accession = hname.group()
else:
msg = 'Provided regexp returned no result for {}.\n' \
'Please provide regular expression capturing sequence id.'.format(hit_id)
raise RuntimeError(msg)
return bdb_accession


def expand_hits(hits, blast_db, query_length, extra=0, blast_regexp=None, skip_missing=False, msgs=None):
"""takes list of blast.HSP objects as first argument and
path to local blast database as second argument
Expand Down Expand Up @@ -81,14 +98,12 @@ def expand_hits(hits, blast_db, query_length, extra=0, blast_regexp=None, skip_m
# add blast record
d['blast'] = hit

# get a index name to the blastdb
hname = re.search(blast_regexp, hit[0])
if not hname:
try:
bdb_accession = match_acc(hit[0], blast_regexp)
except RuntimeError as e:
remove_files_with_try([temp_filename, blast_tempfile])
msg = 'Provided regexp returned no result for {}.\n' \
'Please provide regular expression capturing sequence id.'.format(hit[0])
raise RuntimeError(msg)
bdb_accession = hname.group()
raise e

d['blast'][0] = bdb_accession
loc.append(d)

Expand Down Expand Up @@ -251,13 +266,8 @@ def expand_hits_from_fasta(hits, database, query_length, extra=0, blast_regexp=N
# add blast record
d['blast'] = hit

# get a index name to the blastdb
hname = re.search(blast_regexp, hit[0])
if not hname:
print(blast_regexp)
raise RuntimeError('provided regexp returned no result for %s,'
' please provide regexp valid even for this name', hit[0])
bdb_accession = hname.group()
bdb_accession = match_acc(hit[0], blast_regexp)

d['blast'][0] = bdb_accession

# read from file
Expand Down

0 comments on commit 0e614f3

Please sign in to comment.