From 4da7277aa0c9a3d8286faf2eacd0635b5d84d1d0 Mon Sep 17 00:00:00 2001
From: Marek Schwarz <schwarz.marek@outlook.com>
Date: Mon, 9 Sep 2019 17:46:29 +0200
Subject: [PATCH] Change apparance of help in html output

version bump
remove unused field 'bstrand' from csv output
update anchor handling for very long hits
---
 rna_blast_analyze/BR_core/BA_methods.py       |   5 -
 .../BR_core/expand_by_LOCARNA.py              | 101 ++++++++++++------
 rna_blast_analyze/BR_core/output/onehit.html  |  52 +++++----
 rna_blast_analyze/BR_core/output/style.css    |   5 +-
 rna_blast_analyze/VERSION                     |   2 +-
 .../RF00001_reference_missing_hit.html.md5    |   2 +-
 .../RF00001_reference_output.html.md5         |   2 +-
 7 files changed, 100 insertions(+), 69 deletions(-)

diff --git a/rna_blast_analyze/BR_core/BA_methods.py b/rna_blast_analyze/BR_core/BA_methods.py
index c7665a4..fdc0648 100644
--- a/rna_blast_analyze/BR_core/BA_methods.py
+++ b/rna_blast_analyze/BR_core/BA_methods.py
@@ -78,7 +78,6 @@ def export_pandas_results(self):
             'best_sequence',
             'estart',
             'eend',
-            'bstrand',
             'blast_eval',
             'query_start',
             'query_end',
@@ -149,10 +148,6 @@ def export_pandas_results(self):
                     # extended end
                     data['eend'].append(hit.best_end)
                     continue
-                elif k == 'bstrand':
-                    # blast strand
-                    data['bstrand'].append(hit.source.annotations['blast'][1].strand)
-                    continue
                 elif k == 'best_sequence':
                     # selected sequence
                     data['best_sequence'].append(str(hit.extension.seq))
diff --git a/rna_blast_analyze/BR_core/expand_by_LOCARNA.py b/rna_blast_analyze/BR_core/expand_by_LOCARNA.py
index 1463dc4..1bca69e 100644
--- a/rna_blast_analyze/BR_core/expand_by_LOCARNA.py
+++ b/rna_blast_analyze/BR_core/expand_by_LOCARNA.py
@@ -46,6 +46,9 @@ def locarna_worker(pack):
             to_rna(blast_entry.sbjct),
             anchor_length=anchor_length
         )
+
+        if anchors.too_many_anchors:
+            ml.info('Too many anchors for {}. Can handle up to 520 distinct anchors.'.format(one_expanded_hit.id))
         # extracted temp is my query
 
         # access the locarna aligner directly
@@ -312,13 +315,18 @@ def run_locarna(query_file, subject_file, locarna_params):
         return subject_file + '.loc_out'
 
 
-def write_locarna_anchors_with_min_length(match_line, min_anchor_length=1):
+def write_locarna_anchors_with_min_length(
+        match_line, min_anchor_length=1,
+        pa='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
+        itr='0123456789'
+):
     ml.debug(fname())
     h1 = []
     h2 = []
-    pa = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
-    part_desig = 0
+    max_l = len(pa)
+    part_desig = -1
 
+    to_many_anchors = False
     for match in re.finditer(r'\|+', match_line, flags=re.IGNORECASE):
         if len(match.group()) < min_anchor_length:
             # skip the iterations below minimum length
@@ -331,13 +339,15 @@ def write_locarna_anchors_with_min_length(match_line, min_anchor_length=1):
         c = 0
         part_desig += 1
         for _ in match.group():
-            if c == 9:
+            if c == 10:
                 c = 0
                 part_desig += 1
-            c += 1
-
+            if part_desig >= max_l:
+                to_many_anchors = True
+                continue
             h1.append(pa[part_desig])
-            h2.append(str(c))
+            h2.append(itr[c])
+            c += 1
 
     for i in range(len(match_line) - len(h1)):
         h1.append('.')
@@ -345,7 +355,38 @@ def write_locarna_anchors_with_min_length(match_line, min_anchor_length=1):
 
     anchor_l1 = ''.join(h1)
     anchor_l2 = ''.join(h2)
-    return anchor_l1, anchor_l2
+    return anchor_l1, anchor_l2, to_many_anchors
+
+
+def write_locarna_long_anchors(match_line, min_anchor_length=1):
+    a1f, a2f, _ = write_locarna_anchors_with_min_length(
+        match_line, min_anchor_length, pa='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    )
+
+    b1b, b2b, _ = write_locarna_anchors_with_min_length(
+        match_line[::-1], min_anchor_length, pa='abcdefghijklmnopqrstuvwxyz'[::-1], itr='0123456789'[::-1]
+    )
+
+    b1f = b1b[::-1]
+    b2f = b2b[::-1]
+
+    def _process_parts(fp, sp):
+        h = []
+        for a, b in zip(fp, sp):
+            if a != '.' and b != '.':
+                raise Exception
+            elif a != '.':
+                h.append(a)
+            elif b != '.':
+                h.append(b)
+            else:
+                h.append('.')
+        return ''.join(h)
+
+    anchor_l1 = _process_parts(a1f, b1f)
+    anchor_l2 = _process_parts(a2f, b2f)
+
+    return anchor_l1, anchor_l2, True
 
 
 def squeeze_locarna_anchors_to_aligned_seq(aligned_seq, anchor_line1, anchor_line2):
@@ -370,38 +411,34 @@ def squeeze_locarna_anchors_to_aligned_seq(aligned_seq, anchor_line1, anchor_lin
 class LocarnaAnchor(object):
     """
     while initiating LocarnaAnchor object U can specify minimal anchor length to be used
-    If default (-1) is kept, then minimal anchor length for succesfull usage for locarna is infered
-    and the number is returned in anchor_length parameter
     """
-    def __init__(self, query, match, subject, anchor_length=-1):
+    def __init__(self, query, match, subject, anchor_length=1):
         self.match = match
         self.query = query
         self.subject = subject
-        # self.anchor_l1, self.anchor_l2 = write_locarna_anchors(self.match)
-        # compute anchor length
-
+        self.too_many_anchors = False
         self.anchor_length = anchor_length
-        if anchor_length < 0:
-            while True:
-                self.anchor_l1, self.anchor_l2 = write_locarna_anchors_with_min_length(self.match, self.anchor_length)
-                if '[' in self.anchor_l1:
-                    self.anchor_length += 1
-                else:
-                    break
-        else:
-            self.anchor_l1, self.anchor_l2 = write_locarna_anchors_with_min_length(self.match, self.anchor_length)
 
-        assert len(self.anchor_l1) == len(self.anchor_l2) == len(self.query) == len(self.subject)
+        self.anchor_l1, self.anchor_l2, self.too_many_anchors = write_locarna_anchors_with_min_length(
+            self.match, self.anchor_length)
 
-        if anchor_length < 0:
-            print('inferred anchor length {}'.format(self.anchor_length))
+        if self.too_many_anchors:
+            self.anchor_l1, self.anchor_l2, self.too_many_anchors = write_locarna_long_anchors(
+                self.match, self.anchor_length
+            )
+
+        assert len(self.anchor_l1) == len(self.anchor_l2) == len(self.query) == len(self.subject)
 
-        self.squeezed_query, self.q_al1, self.q_al2 = squeeze_locarna_anchors_to_aligned_seq(self.query,
-                                                                                             self.anchor_l1,
-                                                                                             self.anchor_l2)
-        self.squeezed_subject, self.s_al1, self.s_al2 = squeeze_locarna_anchors_to_aligned_seq(self.subject,
-                                                                                               self.anchor_l1,
-                                                                                               self.anchor_l2)
+        self.squeezed_query, self.q_al1, self.q_al2 = squeeze_locarna_anchors_to_aligned_seq(
+            self.query,
+            self.anchor_l1,
+            self.anchor_l2
+        )
+        self.squeezed_subject, self.s_al1, self.s_al2 = squeeze_locarna_anchors_to_aligned_seq(
+            self.subject,
+            self.anchor_l1,
+            self.anchor_l2
+        )
 
     def anchor_whole_seq(self, seq, seq_line):
         """
diff --git a/rna_blast_analyze/BR_core/output/onehit.html b/rna_blast_analyze/BR_core/output/onehit.html
index 3c982fd..c68a7e8 100644
--- a/rna_blast_analyze/BR_core/output/onehit.html
+++ b/rna_blast_analyze/BR_core/output/onehit.html
@@ -10,7 +10,7 @@
 <pre>BLAST output file:   {{hea.input}}
 Query sequence file: {{hea.query}}
 {% if hea.best_matching_model %}
-RFAM model with best score to a query sequence   <div class="tooltip">?<pre class="tooltiptext">Infered from query sequence by cmscan program.</pre></div>
+RFAM model with best score to a query sequence   <div class="tooltip"><span class="inf">?</span><pre class="tooltiptext">Infered from query sequence by cmscan program.</pre></div>
 Family name: {{hea.best_matching_model['target_name']}}
 E-value:     {{hea.best_matching_model['E-value']}}{% endif %}
 </pre>
@@ -36,7 +36,7 @@ <h3 class="onehit_heading" style="background:{{data.h_color}};">
                     <p class="header-bhname">{{data.blast_hit_name}}</p>
                     <pre class="blasttext">
                         <div class="tooltip blasttooltip">
-?<pre class="tooltiptext">
+<b class="inf">?</b><pre class="tooltiptext">
 This is BLAST alignment as read from the input file</pre>
                         </div>
 {{data.blast_text}}</pre>
@@ -46,51 +46,47 @@ <h3 class="onehit_heading" style="background:{{data.h_color}};">
                     <label class="repheader"><u>Report:</u></label>
                     <table>
                         <tr>
-                            <th class="left" scope="row">sequence start:</th>
-                            <td class="right" id="{{data.intid}}SeqStart">{{data.ext_start}}</td>
-                            <td>
-                                <div class="tooltip"> ?
+                            <th class="left" scope="row">sequence start
+                                <div class="tooltip"><sup><span class="inf">?</span></sup>
                                     <pre class="tooltiptext">
 Start position of the estimated full-length sequence in genome.
 Start index < end index.</pre>
-                                </div>
-                            </td>
+                                </div>:
+                            </th>
+                            <td class="right" id="{{data.intid}}SeqStart">{{data.ext_start}}</td>
                         </tr>
                         <tr>
-                            <th class="left" scope="row">sequence end:</th>
-                            <td class="right" id="{{data.intid}}SeqEnd">{{data.ext_end}}</td>
-                            <td>
-                                <div class="tooltip"> ?
+                            <th class="left" scope="row">sequence end
+                                <div class="tooltip"><sup><span class="inf">?</span></sup>
                                     <pre class="tooltiptext">
 End position of the estimated full-length sequence in genome.
 Start index < end index.</pre>
-                                </div>
-                            </td>
+                                </div>:
+                            </th>
+                            <td class="right" id="{{data.intid}}SeqEnd">{{data.ext_end}}</td>
                         </tr>
                         <tr>
-                            <th class="left" scope="row">bit score (CM):</th>
-                            <td class="right">{{data.rsearchbitscore}}</td>
-                            <td>
-                                <div class="tooltip"> ?
+                            <th class="left" scope="row">bit score (CM)
+                                <div class="tooltip"><sup><span class="inf">?</span></sup>
                                     <pre class="tooltiptext">
 The score for aligning estimated full-length sequence to CM model
   (computed by RSEARCH -> default,
   infered from Rfam or provided by user)</pre>
-                                </div>
-                            </td>
+                                </div>:
+                            </th>
+                            <td class="right">{{data.rsearchbitscore}}</td>
                         </tr>
                         <tr>
-                            <th class="left" scope="row">Homology estimate:</th>
-                            <td class="rigth">{{data.h_estimate}}</td>
-                            <td>
-                                <div class="tooltip"> ?
+                            <th class="left" scope="row">Homology estimate
+                                <div class="tooltip"><sup><span class="inf">?</span></sup>
                                     <pre class="tooltiptext">
 Quick homology estimate:
   Not homologous: bit score < 0
   Homologous: bit score > 20 and bit score > 0.5 * query length
   Uncertain otherwise</pre>
-                                </div>
-                            </td>
+                                </div>:
+                            </th>
+                            <td class="rigth">{{data.h_estimate}}</td>
                         </tr>
                     </table>
                 </div>
@@ -100,7 +96,7 @@ <h3 class="onehit_heading" style="background:{{data.h_color}};">
                     <div>
                         <label for="{{data.intid}}SeqCheck" class="repheader"><u>Estimated full-length sequence:</u></label>
                         <input type="checkbox" class="individualSequenceCheckbox" id="{{data.intid}}SeqCheck">
-                        <div class="tooltip repheader">?
+                        <div class="tooltip repheader"><span class="inf">?</span>
                             <pre class="tooltiptext">
 Click checkbox to select multiple seuqences.
 Fasta header format:
@@ -119,7 +115,7 @@ <h3 class="onehit_heading" style="background:{{data.h_color}};">
                         <figcaption>
                             <label class="repheader">{{pic.picname}}</label>
                             <input type="checkbox" class="individualStructureCheckbox" id="{{data.intid}}{{data.picname}}StrCheck" data-method="{{pic.picname}}">
-                            <div class="tooltip repheader">?
+                            <div class="tooltip repheader"><span class="inf">?</span>
                                 <pre class="tooltiptext">
 Visualisation of predicted secondary structure.
 To save the image:
diff --git a/rna_blast_analyze/BR_core/output/style.css b/rna_blast_analyze/BR_core/output/style.css
index a1e7aec..0780b3d 100644
--- a/rna_blast_analyze/BR_core/output/style.css
+++ b/rna_blast_analyze/BR_core/output/style.css
@@ -188,7 +188,7 @@
     .tooltip {
         /*position: relative;*/
         display: inline-block;
-        width: 1em;
+        /*width: 1em;*/
     }
 
     /* Tooltip text */
@@ -208,4 +208,7 @@
     .rnapic {
         height: 300px;
     }
+    .inf {
+        color: blue;
+    }
 </style>
\ No newline at end of file
diff --git a/rna_blast_analyze/VERSION b/rna_blast_analyze/VERSION
index 8294c18..7693c96 100644
--- a/rna_blast_analyze/VERSION
+++ b/rna_blast_analyze/VERSION
@@ -1 +1 @@
-0.1.2
\ No newline at end of file
+0.1.3
\ No newline at end of file
diff --git a/test_func/test_data/RF00001_reference_missing_hit.html.md5 b/test_func/test_data/RF00001_reference_missing_hit.html.md5
index c249e55..71ea05b 100644
--- a/test_func/test_data/RF00001_reference_missing_hit.html.md5
+++ b/test_func/test_data/RF00001_reference_missing_hit.html.md5
@@ -1 +1 @@
-c9ea04bf0a115e466cb603b19c414050
\ No newline at end of file
+3acbb9ff88bcad8b245c45e5ee8a2fae
\ No newline at end of file
diff --git a/test_func/test_data/RF00001_reference_output.html.md5 b/test_func/test_data/RF00001_reference_output.html.md5
index b350016..aaa4d44 100644
--- a/test_func/test_data/RF00001_reference_output.html.md5
+++ b/test_func/test_data/RF00001_reference_output.html.md5
@@ -1 +1 @@
-cf7bd96127e4a0e0481956a0bb2afe8c
\ No newline at end of file
+859c9f1f37428781e285a513fe1fdae3
\ No newline at end of file