Skip to content

Commit

Permalink
Change apparance of help in html output
Browse files Browse the repository at this point in the history
version bump
remove unused field 'bstrand' from csv output
update anchor handling for very long hits
  • Loading branch information
SchwarzMarek committed Sep 9, 2019
1 parent 8bd17ba commit 4da7277
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 69 deletions.
5 changes: 0 additions & 5 deletions rna_blast_analyze/BR_core/BA_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def export_pandas_results(self):
'best_sequence',
'estart',
'eend',
'bstrand',
'blast_eval',
'query_start',
'query_end',
Expand Down Expand Up @@ -149,10 +148,6 @@ def export_pandas_results(self):
# extended end
data['eend'].append(hit.best_end)
continue
elif k == 'bstrand':
# blast strand
data['bstrand'].append(hit.source.annotations['blast'][1].strand)
continue
elif k == 'best_sequence':
# selected sequence
data['best_sequence'].append(str(hit.extension.seq))
Expand Down
101 changes: 69 additions & 32 deletions rna_blast_analyze/BR_core/expand_by_LOCARNA.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def locarna_worker(pack):
to_rna(blast_entry.sbjct),
anchor_length=anchor_length
)

if anchors.too_many_anchors:
ml.info('Too many anchors for {}. Can handle up to 520 distinct anchors.'.format(one_expanded_hit.id))
# extracted temp is my query

# access the locarna aligner directly
Expand Down Expand Up @@ -312,13 +315,18 @@ def run_locarna(query_file, subject_file, locarna_params):
return subject_file + '.loc_out'


def write_locarna_anchors_with_min_length(match_line, min_anchor_length=1):
def write_locarna_anchors_with_min_length(
match_line, min_anchor_length=1,
pa='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
itr='0123456789'
):
ml.debug(fname())
h1 = []
h2 = []
pa = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
part_desig = 0
max_l = len(pa)
part_desig = -1

to_many_anchors = False
for match in re.finditer(r'\|+', match_line, flags=re.IGNORECASE):
if len(match.group()) < min_anchor_length:
# skip the iterations below minimum length
Expand All @@ -331,21 +339,54 @@ def write_locarna_anchors_with_min_length(match_line, min_anchor_length=1):
c = 0
part_desig += 1
for _ in match.group():
if c == 9:
if c == 10:
c = 0
part_desig += 1
c += 1

if part_desig >= max_l:
to_many_anchors = True
continue
h1.append(pa[part_desig])
h2.append(str(c))
h2.append(itr[c])
c += 1

for i in range(len(match_line) - len(h1)):
h1.append('.')
h2.append('.')

anchor_l1 = ''.join(h1)
anchor_l2 = ''.join(h2)
return anchor_l1, anchor_l2
return anchor_l1, anchor_l2, to_many_anchors


def write_locarna_long_anchors(match_line, min_anchor_length=1):
a1f, a2f, _ = write_locarna_anchors_with_min_length(
match_line, min_anchor_length, pa='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
)

b1b, b2b, _ = write_locarna_anchors_with_min_length(
match_line[::-1], min_anchor_length, pa='abcdefghijklmnopqrstuvwxyz'[::-1], itr='0123456789'[::-1]
)

b1f = b1b[::-1]
b2f = b2b[::-1]

def _process_parts(fp, sp):
h = []
for a, b in zip(fp, sp):
if a != '.' and b != '.':
raise Exception
elif a != '.':
h.append(a)
elif b != '.':
h.append(b)
else:
h.append('.')
return ''.join(h)

anchor_l1 = _process_parts(a1f, b1f)
anchor_l2 = _process_parts(a2f, b2f)

return anchor_l1, anchor_l2, True


def squeeze_locarna_anchors_to_aligned_seq(aligned_seq, anchor_line1, anchor_line2):
Expand All @@ -370,38 +411,34 @@ def squeeze_locarna_anchors_to_aligned_seq(aligned_seq, anchor_line1, anchor_lin
class LocarnaAnchor(object):
"""
while initiating LocarnaAnchor object U can specify minimal anchor length to be used
If default (-1) is kept, then minimal anchor length for succesfull usage for locarna is infered
and the number is returned in anchor_length parameter
"""
def __init__(self, query, match, subject, anchor_length=-1):
def __init__(self, query, match, subject, anchor_length=1):
self.match = match
self.query = query
self.subject = subject
# self.anchor_l1, self.anchor_l2 = write_locarna_anchors(self.match)
# compute anchor length

self.too_many_anchors = False
self.anchor_length = anchor_length
if anchor_length < 0:
while True:
self.anchor_l1, self.anchor_l2 = write_locarna_anchors_with_min_length(self.match, self.anchor_length)
if '[' in self.anchor_l1:
self.anchor_length += 1
else:
break
else:
self.anchor_l1, self.anchor_l2 = write_locarna_anchors_with_min_length(self.match, self.anchor_length)

assert len(self.anchor_l1) == len(self.anchor_l2) == len(self.query) == len(self.subject)
self.anchor_l1, self.anchor_l2, self.too_many_anchors = write_locarna_anchors_with_min_length(
self.match, self.anchor_length)

if anchor_length < 0:
print('inferred anchor length {}'.format(self.anchor_length))
if self.too_many_anchors:
self.anchor_l1, self.anchor_l2, self.too_many_anchors = write_locarna_long_anchors(
self.match, self.anchor_length
)

assert len(self.anchor_l1) == len(self.anchor_l2) == len(self.query) == len(self.subject)

self.squeezed_query, self.q_al1, self.q_al2 = squeeze_locarna_anchors_to_aligned_seq(self.query,
self.anchor_l1,
self.anchor_l2)
self.squeezed_subject, self.s_al1, self.s_al2 = squeeze_locarna_anchors_to_aligned_seq(self.subject,
self.anchor_l1,
self.anchor_l2)
self.squeezed_query, self.q_al1, self.q_al2 = squeeze_locarna_anchors_to_aligned_seq(
self.query,
self.anchor_l1,
self.anchor_l2
)
self.squeezed_subject, self.s_al1, self.s_al2 = squeeze_locarna_anchors_to_aligned_seq(
self.subject,
self.anchor_l1,
self.anchor_l2
)

def anchor_whole_seq(self, seq, seq_line):
"""
Expand Down
52 changes: 24 additions & 28 deletions rna_blast_analyze/BR_core/output/onehit.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<pre>BLAST output file: {{hea.input}}
Query sequence file: {{hea.query}}
{% if hea.best_matching_model %}
RFAM model with best score to a query sequence <div class="tooltip">?<pre class="tooltiptext">Infered from query sequence by cmscan program.</pre></div>
RFAM model with best score to a query sequence <div class="tooltip"><span class="inf">?</span><pre class="tooltiptext">Infered from query sequence by cmscan program.</pre></div>
Family name: {{hea.best_matching_model['target_name']}}
E-value: {{hea.best_matching_model['E-value']}}{% endif %}
</pre>
Expand All @@ -36,7 +36,7 @@ <h3 class="onehit_heading" style="background:{{data.h_color}};">
<p class="header-bhname">{{data.blast_hit_name}}</p>
<pre class="blasttext">
<div class="tooltip blasttooltip">
?<pre class="tooltiptext">
<b class="inf">?</b><pre class="tooltiptext">
This is BLAST alignment as read from the input file</pre>
</div>
{{data.blast_text}}</pre>
Expand All @@ -46,51 +46,47 @@ <h3 class="onehit_heading" style="background:{{data.h_color}};">
<label class="repheader"><u>Report:</u></label>
<table>
<tr>
<th class="left" scope="row">sequence start:</th>
<td class="right" id="{{data.intid}}SeqStart">{{data.ext_start}}</td>
<td>
<div class="tooltip"> ?
<th class="left" scope="row">sequence start
<div class="tooltip"><sup><span class="inf">?</span></sup>
<pre class="tooltiptext">
Start position of the estimated full-length sequence in genome.
Start index < end index.</pre>
</div>
</td>
</div>:
</th>
<td class="right" id="{{data.intid}}SeqStart">{{data.ext_start}}</td>
</tr>
<tr>
<th class="left" scope="row">sequence end:</th>
<td class="right" id="{{data.intid}}SeqEnd">{{data.ext_end}}</td>
<td>
<div class="tooltip"> ?
<th class="left" scope="row">sequence end
<div class="tooltip"><sup><span class="inf">?</span></sup>
<pre class="tooltiptext">
End position of the estimated full-length sequence in genome.
Start index < end index.</pre>
</div>
</td>
</div>:
</th>
<td class="right" id="{{data.intid}}SeqEnd">{{data.ext_end}}</td>
</tr>
<tr>
<th class="left" scope="row">bit score (CM):</th>
<td class="right">{{data.rsearchbitscore}}</td>
<td>
<div class="tooltip"> ?
<th class="left" scope="row">bit score (CM)
<div class="tooltip"><sup><span class="inf">?</span></sup>
<pre class="tooltiptext">
The score for aligning estimated full-length sequence to CM model
(computed by RSEARCH -> default,
infered from Rfam or provided by user)</pre>
</div>
</td>
</div>:
</th>
<td class="right">{{data.rsearchbitscore}}</td>
</tr>
<tr>
<th class="left" scope="row">Homology estimate:</th>
<td class="rigth">{{data.h_estimate}}</td>
<td>
<div class="tooltip"> ?
<th class="left" scope="row">Homology estimate
<div class="tooltip"><sup><span class="inf">?</span></sup>
<pre class="tooltiptext">
Quick homology estimate:
Not homologous: bit score < 0
Homologous: bit score > 20 and bit score > 0.5 * query length
Uncertain otherwise</pre>
</div>
</td>
</div>:
</th>
<td class="rigth">{{data.h_estimate}}</td>
</tr>
</table>
</div>
Expand All @@ -100,7 +96,7 @@ <h3 class="onehit_heading" style="background:{{data.h_color}};">
<div>
<label for="{{data.intid}}SeqCheck" class="repheader"><u>Estimated full-length sequence:</u></label>
<input type="checkbox" class="individualSequenceCheckbox" id="{{data.intid}}SeqCheck">
<div class="tooltip repheader">?
<div class="tooltip repheader"><span class="inf">?</span>
<pre class="tooltiptext">
Click checkbox to select multiple seuqences.
Fasta header format:
Expand All @@ -119,7 +115,7 @@ <h3 class="onehit_heading" style="background:{{data.h_color}};">
<figcaption>
<label class="repheader">{{pic.picname}}</label>
<input type="checkbox" class="individualStructureCheckbox" id="{{data.intid}}{{data.picname}}StrCheck" data-method="{{pic.picname}}">
<div class="tooltip repheader">?
<div class="tooltip repheader"><span class="inf">?</span>
<pre class="tooltiptext">
Visualisation of predicted secondary structure.
To save the image:
Expand Down
5 changes: 4 additions & 1 deletion rna_blast_analyze/BR_core/output/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@
.tooltip {
/*position: relative;*/
display: inline-block;
width: 1em;
/*width: 1em;*/
}

/* Tooltip text */
Expand All @@ -208,4 +208,7 @@
.rnapic {
height: 300px;
}
.inf {
color: blue;
}
</style>
2 changes: 1 addition & 1 deletion rna_blast_analyze/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.2
0.1.3
2 changes: 1 addition & 1 deletion test_func/test_data/RF00001_reference_missing_hit.html.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
c9ea04bf0a115e466cb603b19c414050
3acbb9ff88bcad8b245c45e5ee8a2fae
2 changes: 1 addition & 1 deletion test_func/test_data/RF00001_reference_output.html.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
cf7bd96127e4a0e0481956a0bb2afe8c
859c9f1f37428781e285a513fe1fdae3

0 comments on commit 4da7277

Please sign in to comment.