From 4da7277aa0c9a3d8286faf2eacd0635b5d84d1d0 Mon Sep 17 00:00:00 2001 From: Marek Schwarz Date: Mon, 9 Sep 2019 17:46:29 +0200 Subject: [PATCH] Change apparance of help in html output version bump remove unused field 'bstrand' from csv output update anchor handling for very long hits --- rna_blast_analyze/BR_core/BA_methods.py | 5 - .../BR_core/expand_by_LOCARNA.py | 101 ++++++++++++------ rna_blast_analyze/BR_core/output/onehit.html | 52 +++++---- rna_blast_analyze/BR_core/output/style.css | 5 +- rna_blast_analyze/VERSION | 2 +- .../RF00001_reference_missing_hit.html.md5 | 2 +- .../RF00001_reference_output.html.md5 | 2 +- 7 files changed, 100 insertions(+), 69 deletions(-) diff --git a/rna_blast_analyze/BR_core/BA_methods.py b/rna_blast_analyze/BR_core/BA_methods.py index c7665a4..fdc0648 100644 --- a/rna_blast_analyze/BR_core/BA_methods.py +++ b/rna_blast_analyze/BR_core/BA_methods.py @@ -78,7 +78,6 @@ def export_pandas_results(self): 'best_sequence', 'estart', 'eend', - 'bstrand', 'blast_eval', 'query_start', 'query_end', @@ -149,10 +148,6 @@ def export_pandas_results(self): # extended end data['eend'].append(hit.best_end) continue - elif k == 'bstrand': - # blast strand - data['bstrand'].append(hit.source.annotations['blast'][1].strand) - continue elif k == 'best_sequence': # selected sequence data['best_sequence'].append(str(hit.extension.seq)) diff --git a/rna_blast_analyze/BR_core/expand_by_LOCARNA.py b/rna_blast_analyze/BR_core/expand_by_LOCARNA.py index 1463dc4..1bca69e 100644 --- a/rna_blast_analyze/BR_core/expand_by_LOCARNA.py +++ b/rna_blast_analyze/BR_core/expand_by_LOCARNA.py @@ -46,6 +46,9 @@ def locarna_worker(pack): to_rna(blast_entry.sbjct), anchor_length=anchor_length ) + + if anchors.too_many_anchors: + ml.info('Too many anchors for {}. Can handle up to 520 distinct anchors.'.format(one_expanded_hit.id)) # extracted temp is my query # access the locarna aligner directly @@ -312,13 +315,18 @@ def run_locarna(query_file, subject_file, locarna_params): return subject_file + '.loc_out' -def write_locarna_anchors_with_min_length(match_line, min_anchor_length=1): +def write_locarna_anchors_with_min_length( + match_line, min_anchor_length=1, + pa='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', + itr='0123456789' +): ml.debug(fname()) h1 = [] h2 = [] - pa = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' - part_desig = 0 + max_l = len(pa) + part_desig = -1 + to_many_anchors = False for match in re.finditer(r'\|+', match_line, flags=re.IGNORECASE): if len(match.group()) < min_anchor_length: # skip the iterations below minimum length @@ -331,13 +339,15 @@ def write_locarna_anchors_with_min_length(match_line, min_anchor_length=1): c = 0 part_desig += 1 for _ in match.group(): - if c == 9: + if c == 10: c = 0 part_desig += 1 - c += 1 - + if part_desig >= max_l: + to_many_anchors = True + continue h1.append(pa[part_desig]) - h2.append(str(c)) + h2.append(itr[c]) + c += 1 for i in range(len(match_line) - len(h1)): h1.append('.') @@ -345,7 +355,38 @@ def write_locarna_anchors_with_min_length(match_line, min_anchor_length=1): anchor_l1 = ''.join(h1) anchor_l2 = ''.join(h2) - return anchor_l1, anchor_l2 + return anchor_l1, anchor_l2, to_many_anchors + + +def write_locarna_long_anchors(match_line, min_anchor_length=1): + a1f, a2f, _ = write_locarna_anchors_with_min_length( + match_line, min_anchor_length, pa='ABCDEFGHIJKLMNOPQRSTUVWXYZ' + ) + + b1b, b2b, _ = write_locarna_anchors_with_min_length( + match_line[::-1], min_anchor_length, pa='abcdefghijklmnopqrstuvwxyz'[::-1], itr='0123456789'[::-1] + ) + + b1f = b1b[::-1] + b2f = b2b[::-1] + + def _process_parts(fp, sp): + h = [] + for a, b in zip(fp, sp): + if a != '.' and b != '.': + raise Exception + elif a != '.': + h.append(a) + elif b != '.': + h.append(b) + else: + h.append('.') + return ''.join(h) + + anchor_l1 = _process_parts(a1f, b1f) + anchor_l2 = _process_parts(a2f, b2f) + + return anchor_l1, anchor_l2, True def squeeze_locarna_anchors_to_aligned_seq(aligned_seq, anchor_line1, anchor_line2): @@ -370,38 +411,34 @@ def squeeze_locarna_anchors_to_aligned_seq(aligned_seq, anchor_line1, anchor_lin class LocarnaAnchor(object): """ while initiating LocarnaAnchor object U can specify minimal anchor length to be used - If default (-1) is kept, then minimal anchor length for succesfull usage for locarna is infered - and the number is returned in anchor_length parameter """ - def __init__(self, query, match, subject, anchor_length=-1): + def __init__(self, query, match, subject, anchor_length=1): self.match = match self.query = query self.subject = subject - # self.anchor_l1, self.anchor_l2 = write_locarna_anchors(self.match) - # compute anchor length - + self.too_many_anchors = False self.anchor_length = anchor_length - if anchor_length < 0: - while True: - self.anchor_l1, self.anchor_l2 = write_locarna_anchors_with_min_length(self.match, self.anchor_length) - if '[' in self.anchor_l1: - self.anchor_length += 1 - else: - break - else: - self.anchor_l1, self.anchor_l2 = write_locarna_anchors_with_min_length(self.match, self.anchor_length) - assert len(self.anchor_l1) == len(self.anchor_l2) == len(self.query) == len(self.subject) + self.anchor_l1, self.anchor_l2, self.too_many_anchors = write_locarna_anchors_with_min_length( + self.match, self.anchor_length) - if anchor_length < 0: - print('inferred anchor length {}'.format(self.anchor_length)) + if self.too_many_anchors: + self.anchor_l1, self.anchor_l2, self.too_many_anchors = write_locarna_long_anchors( + self.match, self.anchor_length + ) + + assert len(self.anchor_l1) == len(self.anchor_l2) == len(self.query) == len(self.subject) - self.squeezed_query, self.q_al1, self.q_al2 = squeeze_locarna_anchors_to_aligned_seq(self.query, - self.anchor_l1, - self.anchor_l2) - self.squeezed_subject, self.s_al1, self.s_al2 = squeeze_locarna_anchors_to_aligned_seq(self.subject, - self.anchor_l1, - self.anchor_l2) + self.squeezed_query, self.q_al1, self.q_al2 = squeeze_locarna_anchors_to_aligned_seq( + self.query, + self.anchor_l1, + self.anchor_l2 + ) + self.squeezed_subject, self.s_al1, self.s_al2 = squeeze_locarna_anchors_to_aligned_seq( + self.subject, + self.anchor_l1, + self.anchor_l2 + ) def anchor_whole_seq(self, seq, seq_line): """ diff --git a/rna_blast_analyze/BR_core/output/onehit.html b/rna_blast_analyze/BR_core/output/onehit.html index 3c982fd..c68a7e8 100644 --- a/rna_blast_analyze/BR_core/output/onehit.html +++ b/rna_blast_analyze/BR_core/output/onehit.html @@ -10,7 +10,7 @@
BLAST output file:   {{hea.input}}
 Query sequence file: {{hea.query}}
 {% if hea.best_matching_model %}
-RFAM model with best score to a query sequence   
?
Infered from query sequence by cmscan program.
+RFAM model with best score to a query sequence
?
Infered from query sequence by cmscan program.
Family name: {{hea.best_matching_model['target_name']}} E-value: {{hea.best_matching_model['E-value']}}{% endif %}
@@ -36,7 +36,7 @@

{{data.blast_hit_name}}

                         
-?
+?
 This is BLAST alignment as read from the input file
{{data.blast_text}}
@@ -46,51 +46,47 @@

- - - + - - - + - - - + - - - +
sequence start:{{data.ext_start}} -
? +
sequence start +
?
 Start position of the estimated full-length sequence in genome.
 Start index < end index.
-
- + : +
{{data.ext_start}}
sequence end:{{data.ext_end}} -
? +
sequence end +
?
 End position of the estimated full-length sequence in genome.
 Start index < end index.
-
- + : +
{{data.ext_end}}
bit score (CM):{{data.rsearchbitscore}} -
? +
bit score (CM) +
?
 The score for aligning estimated full-length sequence to CM model
   (computed by RSEARCH -> default,
   infered from Rfam or provided by user)
-
- + : +
{{data.rsearchbitscore}}
Homology estimate:{{data.h_estimate}} -
? +
Homology estimate +
?
 Quick homology estimate:
   Not homologous: bit score < 0
   Homologous: bit score > 20 and bit score > 0.5 * query length
   Uncertain otherwise
-
- + : +
{{data.h_estimate}}
@@ -100,7 +96,7 @@

-
? +
?
 Click checkbox to select multiple seuqences.
 Fasta header format:
@@ -119,7 +115,7 @@ 

-
? +
?
 Visualisation of predicted secondary structure.
 To save the image:
diff --git a/rna_blast_analyze/BR_core/output/style.css b/rna_blast_analyze/BR_core/output/style.css
index a1e7aec..0780b3d 100644
--- a/rna_blast_analyze/BR_core/output/style.css
+++ b/rna_blast_analyze/BR_core/output/style.css
@@ -188,7 +188,7 @@
     .tooltip {
         /*position: relative;*/
         display: inline-block;
-        width: 1em;
+        /*width: 1em;*/
     }
 
     /* Tooltip text */
@@ -208,4 +208,7 @@
     .rnapic {
         height: 300px;
     }
+    .inf {
+        color: blue;
+    }
 
\ No newline at end of file
diff --git a/rna_blast_analyze/VERSION b/rna_blast_analyze/VERSION
index 8294c18..7693c96 100644
--- a/rna_blast_analyze/VERSION
+++ b/rna_blast_analyze/VERSION
@@ -1 +1 @@
-0.1.2
\ No newline at end of file
+0.1.3
\ No newline at end of file
diff --git a/test_func/test_data/RF00001_reference_missing_hit.html.md5 b/test_func/test_data/RF00001_reference_missing_hit.html.md5
index c249e55..71ea05b 100644
--- a/test_func/test_data/RF00001_reference_missing_hit.html.md5
+++ b/test_func/test_data/RF00001_reference_missing_hit.html.md5
@@ -1 +1 @@
-c9ea04bf0a115e466cb603b19c414050
\ No newline at end of file
+3acbb9ff88bcad8b245c45e5ee8a2fae
\ No newline at end of file
diff --git a/test_func/test_data/RF00001_reference_output.html.md5 b/test_func/test_data/RF00001_reference_output.html.md5
index b350016..aaa4d44 100644
--- a/test_func/test_data/RF00001_reference_output.html.md5
+++ b/test_func/test_data/RF00001_reference_output.html.md5
@@ -1 +1 @@
-cf7bd96127e4a0e0481956a0bb2afe8c
\ No newline at end of file
+859c9f1f37428781e285a513fe1fdae3
\ No newline at end of file