Merge branch 'master' of github.com:bioexcel/biobb_dna

bioexcel · Jun 18, 2024 · b202155 · b202155
2 parents 90e7841 + 06e66e8
commit b202155
Show file tree

Hide file tree

Showing 46 changed files with 451 additions and 454 deletions.
diff --git a/.github/env.yaml b/.github/env.yaml
@@ -7,5 +7,5 @@ dependencies:
   - biobb_common ==4.2.0
   - pandas >=1.3.0
   - scikit-learn >=0.24.2
-  - curves ==3.0.0
+  - curves ==3.0.2
   - imagehash
diff --git a/.github/workflows/linting_and_testing.yml b/.github/workflows/linting_and_testing.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       matrix:
         os: [self-hosted]
-        python-version: ["3.8", "3.9"]
+        python-version: ["3.8", "3.9", "3.10"]
     runs-on: ${{ matrix.os }}
     steps:
       - name: Check out repository code

diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 [![](https://img.shields.io/conda/vn/bioconda/biobb_dna?label=Conda)](https://anaconda.org/bioconda/biobb_dna)
 [![](https://img.shields.io/conda/dn/bioconda/biobb_dna?label=Conda%20Downloads)](https://anaconda.org/bioconda/biobb_dna)
 [![](https://img.shields.io/badge/Docker-Quay.io-blue)](https://quay.io/repository/biocontainers/biobb_dna?tab=tags)
-[![](https://img.shields.io/badge/Singularity-GalaxyProject-blue)](https://depot.galaxyproject.org/singularity/biobb_dna:4.2.0--pyhdfd78af_0)
+[![](https://img.shields.io/badge/Singularity-GalaxyProject-blue)](https://depot.galaxyproject.org/singularity/biobb_dna:4.2.2--pyhdfd78af_0)
 
 [![](https://img.shields.io/badge/OS-Unix%20%7C%20MacOS-blue)](https://github.com/bioexcel/biobb_dna)
 [![](https://img.shields.io/pypi/pyversions/biobb-dna.svg?label=Python%20Versions)](https://pypi.org/project/biobb-dna/)
@@ -41,7 +41,7 @@ The latest documentation of this package can be found in our readthedocs site:
 [latest API documentation](http://biobb-dna.readthedocs.io/en/latest/).
 
 ### Version
-v4.2.0 2024.1
+v4.2.2 2024.1
 
 ### Installation
 Using PIP:
@@ -51,7 +51,7 @@ Using PIP:
 * Installation:
 
 
-        pip install "biobb_dna>=4.2.0"
+        pip install "biobb_dna>=4.2.2"
 
 
 * Usage: [Python API documentation](https://biobb-dna.readthedocs.io/en/latest/modules.html)
@@ -61,7 +61,7 @@ Using ANACONDA:
 * Installation:
 
 
-        conda install -c bioconda "biobb_dna>=4.2.0"
+        conda install -c bioconda "biobb_dna>=4.2.2"
 
 
 * Usage: With conda installation BioBBs can be used with the [Python API documentation](https://biobb-dna.readthedocs.io/en/latest/modules.html) and the [Command Line documentation](https://biobb-dna.readthedocs.io/en/latest/command_line.html)
@@ -71,13 +71,13 @@ Using DOCKER:
 * Installation:
 
 
-        docker pull quay.io/biocontainers/biobb_dna:4.2.0--pyhdfd78af_0
+        docker pull quay.io/biocontainers/biobb_dna:4.2.2--pyhdfd78af_0
 
 
 * Usage:
 
 
-        docker run quay.io/biocontainers/biobb_dna:4.2.0--pyhdfd78af_0 <command>
+        docker run quay.io/biocontainers/biobb_dna:4.2.2--pyhdfd78af_0 <command>
 
 Using SINGULARITY:
 
@@ -86,7 +86,7 @@ Using SINGULARITY:
 * Installation:
 
 
-        singularity pull --name biobb_dna.sif https://depot.galaxyproject.org/singularity/biobb_dna:4.2.0--pyhdfd78af_0
+        singularity pull --name biobb_dna.sif https://depot.galaxyproject.org/singularity/biobb_dna:4.2.2--pyhdfd78af_0
 
 
 * Usage:

diff --git a/biobb_dna/__init__.py b/biobb_dna/__init__.py
@@ -1,4 +1,4 @@
 name = "biobb_dna"
 __all__ = ["dna", "curvesplus", "backbone", "stiffness",
            "interbp_correlations", "intrabp_correlations"]
-__version__ = "4.2.0"
+__version__ = "4.2.2"
diff --git a/biobb_dna/backbone/bipopulations.py b/biobb_dna/backbone/bipopulations.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 """Module containing the BIPopulations class and the command line interface."""
 
-import shutil
 import argparse
 
 import matplotlib.pyplot as plt
@@ -11,7 +10,6 @@
 from biobb_dna.utils.transform import inverse_complement
 from biobb_common.generic.biobb_object import BiobbObject
 from biobb_common.tools.file_utils import launchlogger
-from biobb_common.tools import file_utils as fu
 from biobb_common.configuration import settings
 
 
@@ -115,28 +113,18 @@ def launch(self) -> int:
                 raise ValueError(
                     "seqpos must be a list of at least two integers")
 
-        # Creating temporary folder
-        self.tmp_folder = fu.create_unique_dir(prefix="backbone_")
-        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
-
-        # Copy input_file_path1 to temporary folder
-        shutil.copy(self.io_dict['in']['input_epsilC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_epsilW_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_zetaC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_zetaW_path'], self.tmp_folder)
-
         # read input files
         epsilC = read_series(
-            self.io_dict['in']['input_epsilC_path'],
+            self.stage_io_dict['in']['input_epsilC_path'],
             usecols=self.seqpos)
         epsilW = read_series(
-            self.io_dict['in']['input_epsilW_path'],
+            self.stage_io_dict['in']['input_epsilW_path'],
             usecols=self.seqpos)
         zetaC = read_series(
-            self.io_dict['in']['input_zetaC_path'],
+            self.stage_io_dict['in']['input_zetaC_path'],
             usecols=self.seqpos)
         zetaW = read_series(
-            self.io_dict['in']['input_zetaW_path'],
+            self.stage_io_dict['in']['input_zetaW_path'],
             usecols=self.seqpos)
 
         # calculate difference between epsil and zeta parameters
@@ -159,7 +147,7 @@ def launch(self) -> int:
             "BI population": BI,
             "BII population": BII})
         Bpopulations_df.to_csv(
-            self.io_dict['out']['output_csv_path'],
+            self.stage_io_dict['out']['output_csv_path'],
             index=False)
 
         # save plot
@@ -186,14 +174,16 @@ def launch(self) -> int:
         axs.set_ylabel("BI/BII Population (%)")
         axs.set_title("Nucleotide parameter: BI/BII Population")
         fig.savefig(
-            self.io_dict['out']['output_jpg_path'],
+            self.stage_io_dict['out']['output_jpg_path'],
             format="jpg")
         plt.close()
 
+        # Copy files to host
+        self.copy_to_host()
+
         # Remove temporary file(s)
         self.tmp_files.extend([
-            self.stage_io_dict.get("unique_dir"),
-            self.tmp_folder
+            self.stage_io_dict.get("unique_dir")
         ])
         self.remove_tmp_files()
 

diff --git a/biobb_dna/backbone/canonicalag.py b/biobb_dna/backbone/canonicalag.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 """Module containing the CanonicalAG class and the command line interface."""
 
-import shutil
 import argparse
 
 import matplotlib.pyplot as plt
@@ -11,7 +10,6 @@
 from biobb_dna.utils.transform import inverse_complement
 from biobb_common.generic.biobb_object import BiobbObject
 from biobb_common.tools.file_utils import launchlogger
-from biobb_common.tools import file_utils as fu
 from biobb_common.configuration import settings
 
 
@@ -120,25 +118,15 @@ def launch(self) -> int:
                 raise ValueError(
                     "seqpos must be a list of at least two integers")
 
-        # Creating temporary folder
-        self.tmp_folder = fu.create_unique_dir(prefix="backbone_")
-        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
-
-        # Copy input_file_path1 to temporary folder
-        shutil.copy(self.io_dict['in']['input_alphaC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_alphaW_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_gammaC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_gammaW_path'], self.tmp_folder)
-
         # read input files
         alphaC = read_series(
-            self.io_dict['in']['input_alphaC_path'], usecols=self.seqpos)
+            self.stage_io_dict['in']['input_alphaC_path'], usecols=self.seqpos)
         alphaW = read_series(
-            self.io_dict['in']['input_alphaW_path'], usecols=self.seqpos)
+            self.stage_io_dict['in']['input_alphaW_path'], usecols=self.seqpos)
         gammaC = read_series(
-            self.io_dict['in']['input_gammaC_path'], usecols=self.seqpos)
+            self.stage_io_dict['in']['input_gammaC_path'], usecols=self.seqpos)
         gammaW = read_series(
-            self.io_dict['in']['input_gammaW_path'], usecols=self.seqpos)
+            self.stage_io_dict['in']['input_gammaW_path'], usecols=self.seqpos)
 
         # fix angle range so its not negative
         alphaC = self.fix_angles(alphaC)
@@ -162,7 +150,7 @@ def launch(self) -> int:
             "Nucleotide": xlabels,
             "Canonical alpha/gamma": canonical_populations})
         ag_populations_df.to_csv(
-            self.io_dict['out']['output_csv_path'],
+            self.stage_io_dict['out']['output_csv_path'],
             index=False)
 
         # save plot
@@ -189,14 +177,16 @@ def launch(self) -> int:
         axs.set_ylabel("Canonical Alpha-Gamma (%)")
         axs.set_title("Nucleotide parameter: Canonical Alpha-Gamma")
         fig.savefig(
-            self.io_dict['out']['output_jpg_path'],
+            self.stage_io_dict['out']['output_jpg_path'],
             format="jpg")
         plt.close()
 
+        # Copy files to host
+        self.copy_to_host()
+
         # Remove temporary file(s)
         self.tmp_files.extend([
-            self.stage_io_dict.get("unique_dir"),
-            self.tmp_folder
+            self.stage_io_dict.get("unique_dir")
         ])
         self.remove_tmp_files()
 

diff --git a/biobb_dna/backbone/puckering.py b/biobb_dna/backbone/puckering.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 """Module containing the Puckering class and the command line interface."""
 
-import shutil
 import argparse
 
 import matplotlib.pyplot as plt
@@ -11,7 +10,6 @@
 from biobb_dna.utils.transform import inverse_complement
 from biobb_common.generic.biobb_object import BiobbObject
 from biobb_common.tools.file_utils import launchlogger
-from biobb_common.tools import file_utils as fu
 from biobb_common.configuration import settings
 
 
@@ -113,20 +111,12 @@ def launch(self) -> int:
                 raise ValueError(
                     "seqpos must be a list of at least two integers")
 
-        # Creating temporary folder
-        self.tmp_folder = fu.create_unique_dir(prefix="backbone_")
-        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
-
-        # Copy input_file_path1 to temporary folder
-        shutil.copy(self.io_dict['in']['input_phaseC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_phaseW_path'], self.tmp_folder)
-
         # read input files
         phaseC = read_series(
-            self.io_dict['in']['input_phaseC_path'],
+            self.stage_io_dict['in']['input_phaseC_path'],
             usecols=self.seqpos)
         phaseW = read_series(
-            self.io_dict['in']['input_phaseW_path'],
+            self.stage_io_dict['in']['input_phaseW_path'],
             usecols=self.seqpos)
 
         # fix angle range so its not negative
@@ -173,7 +163,7 @@ def launch(self) -> int:
         axs.set_ylabel("Puckering (%)")
         axs.set_title("Nucleotide parameter: Puckering")
         fig.savefig(
-            self.io_dict['out']['output_jpg_path'],
+            self.stage_io_dict['out']['output_jpg_path'],
             format="jpg")
 
         # save table
@@ -184,15 +174,17 @@ def launch(self) -> int:
             "West": Wpop,
             "South": Spop})
         populations.to_csv(
-            self.io_dict['out']['output_csv_path'],
+            self.stage_io_dict['out']['output_csv_path'],
             index=False)
 
         plt.close()
 
+        # Copy files to host
+        self.copy_to_host()
+
         # Remove temporary file(s)
         self.tmp_files.extend([
-            self.stage_io_dict.get("unique_dir"),
-            self.tmp_folder
+            self.stage_io_dict.get("unique_dir")
         ])
         self.remove_tmp_files()
 

diff --git a/biobb_dna/curvesplus/biobb_canal.py b/biobb_dna/curvesplus/biobb_canal.py
@@ -2,7 +2,6 @@
 
 """Module containing the Canal class and the command line interface."""
 import os
-import shutil
 import zipfile
 import argparse
 from pathlib import Path
@@ -112,38 +111,27 @@ def launch(self) -> int:
         self.stage_files()
 
         if self.sequence is None:
-            if self.io_dict['in']['input_lis_file'] is None:
+            if self.stage_io_dict['in']['input_lis_file'] is None:
                 raise RuntimeError(
                     "if no sequence is passed in the configuration, "
                     "you must at least specify `input_lis_file` "
                     "so sequence can be parsed from there")
             lis_lines = Path(
-                self.io_dict['in']['input_lis_file']).read_text().splitlines()
+                self.stage_io_dict['in']['input_lis_file']).read_text().splitlines()
             for line in lis_lines:
                 if line.strip().startswith("Strand  1"):
                     self.sequence = line.split(" ")[-1]
                     fu.log(
                         f"using sequence {self.sequence} "
-                        f"from {self.io_dict['in']['input_lis_file']}",
+                        f"from {self.stage_io_dict['in']['input_lis_file']}",
                         self.out_log)
 
-        # Creating temporary folder
-        self.tmp_folder = fu.create_unique_dir(prefix="canal_")
-        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
-
-        # copy input files to temporary folder
-        shutil.copy(
-            self.io_dict['in']['input_cda_file'],
-            self.tmp_folder)
-        tmp_cda_path = Path(self.io_dict['in']['input_cda_file']).name
-        if self.io_dict['in']['input_lis_file'] is not None:
-            shutil.copy(
-                self.io_dict['in']['input_lis_file'],
-                self.tmp_folder)
+        # define temporary file name
+        tmp_cda_path = Path(self.stage_io_dict['in']['input_cda_file']).name
 
         # change directory to temporary folder
         original_directory = os.getcwd()
-        os.chdir(self.tmp_folder)
+        os.chdir(self.stage_io_dict.get("unique_dir"))
 
         # create intructions
         instructions = [
@@ -184,17 +172,20 @@ def launch(self) -> int:
 
         # create zipfile and write output inside
         zf = zipfile.ZipFile(
-            Path(self.io_dict["out"]["output_zip_path"]), "w")
-        for canal_outfile in Path(self.tmp_folder).glob("canal_output*"):
-            zf.write(
-                canal_outfile,
-                arcname=canal_outfile.name)
+            Path(self.stage_io_dict["out"]["output_zip_path"]), "w")
+        for canal_outfile in Path(self.stage_io_dict.get("unique_dir")).glob("canal_output*"):
+            if canal_outfile.suffix not in (".zip"):
+                zf.write(
+                    canal_outfile,
+                    arcname=canal_outfile.name)
         zf.close()
 
+        # Copy files to host
+        self.copy_to_host()
+
         # Remove temporary file(s)
         self.tmp_files.extend([
-            self.stage_io_dict.get("unique_dir"),
-            self.tmp_folder
+            self.stage_io_dict.get("unique_dir")
         ])
         self.remove_tmp_files()