From 5e7483d2dcc2bdb6022fa58caa44a30803f6e87c Mon Sep 17 00:00:00 2001 From: Marek Schwarz Date: Tue, 26 Nov 2019 12:43:19 +0100 Subject: [PATCH] update readme, blast regex and version --- readme.md | 3 ++- rna_blast_analyze/BR_core/parse_accession.py | 11 +++-------- rna_blast_analyze/VERSION | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/readme.md b/readme.md index 6983adf..da316c2 100644 --- a/readme.md +++ b/readme.md @@ -67,7 +67,8 @@ conda install -c conda-forge -c bioconda -c schwarz.marek rboanalyzer # or pip3 install --upgrade pip --user ``` -- ncbi-blast+ >= 2.6, < 2.8.1 [link](http://ftp.ncbi.nih.gov/blast/executables/blast+/2.7.1/) +- ncbi-blast+ >= 2.8.1 [link](http://ftp.ncbi.nih.gov/blast/executables/blast+/2.9.0/) + (The pipeline can use blast from version 2.6.0, however this version is not compatible with blast dbv5) - locarna >= 1.9.2, <2 [link](https://github.com/s-will/LocARNA/releases/tag/v1.9.2.2) - infernal >= 1.1, <2 [link](http://eddylab.org/infernal/) - clustalo >= 1.2.4, <2 [link](http://www.clustal.org/omega/) diff --git a/rna_blast_analyze/BR_core/parse_accession.py b/rna_blast_analyze/BR_core/parse_accession.py index d4e819b..d5aa3a8 100644 --- a/rna_blast_analyze/BR_core/parse_accession.py +++ b/rna_blast_analyze/BR_core/parse_accession.py @@ -73,26 +73,21 @@ old = [r'ZP_[0-9]{8}\.[0-9]+', r'NS_[0-9]{6}\.[0-9]+'] -pdb = ["[0-9A-Z]{4}[_|][0-9A-Z]{1,2}",] +# accommodate up to 4 characters chain id (https://www.wwpdb.org/deposition/preparing-pdbx-mmcif-files) +pdb_on_steroids = [r"(gi\|\d+\|)?(?(1))((?<=\|pdb\|)[0-9A-Z]{4}[_|][0-9A-Za-z]{1,4}|^[0-9A-Z]{4}[_|][0-9A-Za-z]{1,4})"] exceptions = [ '1KPD', # although it has chain, in the NCBI nt database it is listed without chain r'GPS_[0-9]{9}\.[0-9]+', # from refseq ] -known_acc_formats = genbank_nucl + genbank_wgs + refseq_re + genbank_mga + genbank_prot + pdb + genbank_wgs_scafolds + exceptions + old +known_acc_formats = genbank_nucl + genbank_wgs + refseq_re + genbank_mga + genbank_prot + genbank_wgs_scafolds + exceptions + old + pdb_on_steroids accession_regex = '|'.join(known_acc_formats) compiled_accession_regex = re.compile(accession_regex) if __name__ == '__main__': - _javascript_xml_re = '(' + accession_regex + ')(?=\|?)' - _javascript_txt_re = '^>(' + accession_regex - - print(_javascript_xml_re.__repr__()) - print(_javascript_txt_re.__repr__()) - import sys import gzip import re diff --git a/rna_blast_analyze/VERSION b/rna_blast_analyze/VERSION index 1a3c67c..446ba66 100644 --- a/rna_blast_analyze/VERSION +++ b/rna_blast_analyze/VERSION @@ -1 +1 @@ -0.1.4a2 \ No newline at end of file +0.1.4 \ No newline at end of file