Curves 3.0.2 and bug fixes

bioexcel · Jun 14, 2024 · 7236a0d · 7236a0d
1 parent 593cd35
commit 7236a0d
Show file tree

Hide file tree

Showing 38 changed files with 401 additions and 423 deletions.
diff --git a/.github/env.yaml b/.github/env.yaml
@@ -7,5 +7,5 @@ dependencies:
   - biobb_common ==4.2.0
   - pandas >=1.3.0
   - scikit-learn >=0.24.2
-  - curves ==3.0.0
+  - curves ==3.0.2
   - imagehash
diff --git a/.github/workflows/linting_and_testing.yml b/.github/workflows/linting_and_testing.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       matrix:
         os: [self-hosted]
-        python-version: ["3.8", "3.9"]
+        python-version: ["3.8", "3.9", "3.10"]
     runs-on: ${{ matrix.os }}
     steps:
       - name: Check out repository code

diff --git a/biobb_dna/backbone/bipopulations.py b/biobb_dna/backbone/bipopulations.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 """Module containing the BIPopulations class and the command line interface."""
 
-import shutil
 import argparse
 
 import matplotlib.pyplot as plt
@@ -11,7 +10,6 @@
 from biobb_dna.utils.transform import inverse_complement
 from biobb_common.generic.biobb_object import BiobbObject
 from biobb_common.tools.file_utils import launchlogger
-from biobb_common.tools import file_utils as fu
 from biobb_common.configuration import settings
 
 
@@ -114,28 +112,18 @@ def launch(self) -> int:
                 raise ValueError(
                     "seqpos must be a list of at least two integers")
 
-        # Creating temporary folder
-        self.tmp_folder = fu.create_unique_dir(prefix="backbone_")
-        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
-
-        # Copy input_file_path1 to temporary folder
-        shutil.copy(self.io_dict['in']['input_epsilC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_epsilW_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_zetaC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_zetaW_path'], self.tmp_folder)
-
         # read input files
         epsilC = read_series(
-            self.io_dict['in']['input_epsilC_path'],
+            self.stage_io_dict['in']['input_epsilC_path'],
             usecols=self.seqpos)
         epsilW = read_series(
-            self.io_dict['in']['input_epsilW_path'],
+            self.stage_io_dict['in']['input_epsilW_path'],
             usecols=self.seqpos)
         zetaC = read_series(
-            self.io_dict['in']['input_zetaC_path'],
+            self.stage_io_dict['in']['input_zetaC_path'],
             usecols=self.seqpos)
         zetaW = read_series(
-            self.io_dict['in']['input_zetaW_path'],
+            self.stage_io_dict['in']['input_zetaW_path'],
             usecols=self.seqpos)
 
         # calculate difference between epsil and zeta parameters
@@ -158,7 +146,7 @@ def launch(self) -> int:
             "BI population": BI,
             "BII population": BII})
         Bpopulations_df.to_csv(
-            self.io_dict['out']['output_csv_path'],
+            self.stage_io_dict['out']['output_csv_path'],
             index=False)
 
         # save plot
@@ -185,14 +173,16 @@ def launch(self) -> int:
         axs.set_ylabel("BI/BII Population (%)")
         axs.set_title("Nucleotide parameter: BI/BII Population")
         fig.savefig(
-            self.io_dict['out']['output_jpg_path'],
+            self.stage_io_dict['out']['output_jpg_path'],
             format="jpg")
         plt.close()
 
+        # Copy files to host
+        self.copy_to_host()
+
         # Remove temporary file(s)
         self.tmp_files.extend([
-            self.stage_io_dict.get("unique_dir"),
-            self.tmp_folder
+            self.stage_io_dict.get("unique_dir")
         ])
         self.remove_tmp_files()
 

diff --git a/biobb_dna/backbone/canonicalag.py b/biobb_dna/backbone/canonicalag.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 """Module containing the CanonicalAG class and the command line interface."""
 
-import shutil
 import argparse
 
 import matplotlib.pyplot as plt
@@ -11,7 +10,6 @@
 from biobb_dna.utils.transform import inverse_complement
 from biobb_common.generic.biobb_object import BiobbObject
 from biobb_common.tools.file_utils import launchlogger
-from biobb_common.tools import file_utils as fu
 from biobb_common.configuration import settings
 
 
@@ -119,25 +117,15 @@ def launch(self) -> int:
                 raise ValueError(
                     "seqpos must be a list of at least two integers")
 
-        # Creating temporary folder
-        self.tmp_folder = fu.create_unique_dir(prefix="backbone_")
-        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
-
-        # Copy input_file_path1 to temporary folder
-        shutil.copy(self.io_dict['in']['input_alphaC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_alphaW_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_gammaC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_gammaW_path'], self.tmp_folder)
-
         # read input files
         alphaC = read_series(
-            self.io_dict['in']['input_alphaC_path'], usecols=self.seqpos)
+            self.stage_io_dict['in']['input_alphaC_path'], usecols=self.seqpos)
         alphaW = read_series(
-            self.io_dict['in']['input_alphaW_path'], usecols=self.seqpos)
+            self.stage_io_dict['in']['input_alphaW_path'], usecols=self.seqpos)
         gammaC = read_series(
-            self.io_dict['in']['input_gammaC_path'], usecols=self.seqpos)
+            self.stage_io_dict['in']['input_gammaC_path'], usecols=self.seqpos)
         gammaW = read_series(
-            self.io_dict['in']['input_gammaW_path'], usecols=self.seqpos)
+            self.stage_io_dict['in']['input_gammaW_path'], usecols=self.seqpos)
 
         # fix angle range so its not negative
         alphaC = self.fix_angles(alphaC)
@@ -161,7 +149,7 @@ def launch(self) -> int:
             "Nucleotide": xlabels,
             "Canonical alpha/gamma": canonical_populations})
         ag_populations_df.to_csv(
-            self.io_dict['out']['output_csv_path'],
+            self.stage_io_dict['out']['output_csv_path'],
             index=False)
 
         # save plot
@@ -188,14 +176,16 @@ def launch(self) -> int:
         axs.set_ylabel("Canonical Alpha-Gamma (%)")
         axs.set_title("Nucleotide parameter: Canonical Alpha-Gamma")
         fig.savefig(
-            self.io_dict['out']['output_jpg_path'],
+            self.stage_io_dict['out']['output_jpg_path'],
             format="jpg")
         plt.close()
 
+        # Copy files to host
+        self.copy_to_host()
+
         # Remove temporary file(s)
         self.tmp_files.extend([
-            self.stage_io_dict.get("unique_dir"),
-            self.tmp_folder
+            self.stage_io_dict.get("unique_dir")
         ])
         self.remove_tmp_files()
 

diff --git a/biobb_dna/backbone/puckering.py b/biobb_dna/backbone/puckering.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 """Module containing the Puckering class and the command line interface."""
 
-import shutil
 import argparse
 
 import matplotlib.pyplot as plt
@@ -11,7 +10,6 @@
 from biobb_dna.utils.transform import inverse_complement
 from biobb_common.generic.biobb_object import BiobbObject
 from biobb_common.tools.file_utils import launchlogger
-from biobb_common.tools import file_utils as fu
 from biobb_common.configuration import settings
 
 
@@ -112,20 +110,12 @@ def launch(self) -> int:
                 raise ValueError(
                     "seqpos must be a list of at least two integers")
 
-        # Creating temporary folder
-        self.tmp_folder = fu.create_unique_dir(prefix="backbone_")
-        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
-
-        # Copy input_file_path1 to temporary folder
-        shutil.copy(self.io_dict['in']['input_phaseC_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_phaseW_path'], self.tmp_folder)
-
         # read input files
         phaseC = read_series(
-            self.io_dict['in']['input_phaseC_path'],
+            self.stage_io_dict['in']['input_phaseC_path'],
             usecols=self.seqpos)
         phaseW = read_series(
-            self.io_dict['in']['input_phaseW_path'],
+            self.stage_io_dict['in']['input_phaseW_path'],
             usecols=self.seqpos)
 
         # fix angle range so its not negative
@@ -172,7 +162,7 @@ def launch(self) -> int:
         axs.set_ylabel("Puckering (%)")
         axs.set_title("Nucleotide parameter: Puckering")
         fig.savefig(
-            self.io_dict['out']['output_jpg_path'],
+            self.stage_io_dict['out']['output_jpg_path'],
             format="jpg")
 
         # save table
@@ -183,15 +173,17 @@ def launch(self) -> int:
             "West": Wpop,
             "South": Spop})
         populations.to_csv(
-            self.io_dict['out']['output_csv_path'],
+            self.stage_io_dict['out']['output_csv_path'],
             index=False)
 
         plt.close()
 
+        # Copy files to host
+        self.copy_to_host()
+
         # Remove temporary file(s)
         self.tmp_files.extend([
-            self.stage_io_dict.get("unique_dir"),
-            self.tmp_folder
+            self.stage_io_dict.get("unique_dir")
         ])
         self.remove_tmp_files()
 

diff --git a/biobb_dna/curvesplus/biobb_canal.py b/biobb_dna/curvesplus/biobb_canal.py
@@ -2,7 +2,6 @@
 
 """Module containing the Canal class and the command line interface."""
 import os
-import shutil
 import zipfile
 import argparse
 from pathlib import Path
@@ -111,38 +110,27 @@ def launch(self) -> int:
         self.stage_files()
 
         if self.sequence is None:
-            if self.io_dict['in']['input_lis_file'] is None:
+            if self.stage_io_dict['in']['input_lis_file'] is None:
                 raise RuntimeError(
                     "if no sequence is passed in the configuration, "
                     "you must at least specify `input_lis_file` "
                     "so sequence can be parsed from there")
             lis_lines = Path(
-                self.io_dict['in']['input_lis_file']).read_text().splitlines()
+                self.stage_io_dict['in']['input_lis_file']).read_text().splitlines()
             for line in lis_lines:
                 if line.strip().startswith("Strand  1"):
                     self.sequence = line.split(" ")[-1]
                     fu.log(
                         f"using sequence {self.sequence} "
-                        f"from {self.io_dict['in']['input_lis_file']}",
+                        f"from {self.stage_io_dict['in']['input_lis_file']}",
                         self.out_log)
 
-        # Creating temporary folder
-        self.tmp_folder = fu.create_unique_dir(prefix="canal_")
-        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
-
-        # copy input files to temporary folder
-        shutil.copy(
-            self.io_dict['in']['input_cda_file'],
-            self.tmp_folder)
-        tmp_cda_path = Path(self.io_dict['in']['input_cda_file']).name
-        if self.io_dict['in']['input_lis_file'] is not None:
-            shutil.copy(
-                self.io_dict['in']['input_lis_file'],
-                self.tmp_folder)
+        # define temporary file name
+        tmp_cda_path = Path(self.stage_io_dict['in']['input_cda_file']).name
 
         # change directory to temporary folder
         original_directory = os.getcwd()
-        os.chdir(self.tmp_folder)
+        os.chdir(self.stage_io_dict.get("unique_dir"))
 
         # create intructions
         instructions = [
@@ -183,17 +171,20 @@ def launch(self) -> int:
 
         # create zipfile and write output inside
         zf = zipfile.ZipFile(
-            Path(self.io_dict["out"]["output_zip_path"]), "w")
-        for canal_outfile in Path(self.tmp_folder).glob("canal_output*"):
-            zf.write(
-                canal_outfile,
-                arcname=canal_outfile.name)
+            Path(self.stage_io_dict["out"]["output_zip_path"]), "w")
+        for canal_outfile in Path(self.stage_io_dict.get("unique_dir")).glob("canal_output*"):
+            if canal_outfile.suffix not in (".zip"):
+                zf.write(
+                    canal_outfile,
+                    arcname=canal_outfile.name)
         zf.close()
 
+        # Copy files to host
+        self.copy_to_host()
+
         # Remove temporary file(s)
         self.tmp_files.extend([
-            self.stage_io_dict.get("unique_dir"),
-            self.tmp_folder
+            self.stage_io_dict.get("unique_dir")
         ])
         self.remove_tmp_files()
 

diff --git a/biobb_dna/curvesplus/biobb_canion.py b/biobb_dna/curvesplus/biobb_canion.py
@@ -4,7 +4,6 @@
 import os
 import zipfile
 import argparse
-import shutil
 from pathlib import Path
 from biobb_common.generic.biobb_object import BiobbObject
 from biobb_common.configuration import settings
@@ -22,7 +21,7 @@ class Canion(BiobbObject):
         input_cdi_path (str): Trajectory input file. File type: input. `Sample file <https://mmb.irbbarcelona.org/biobb-dev/biobb-api/public/samples/THGA_K.cdi>`_. Accepted formats: cdi (edam:format_2330).
         input_afr_path (str): Helical axis frames corresponding to the input conformation to be analyzed. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA.afr>`_. Accepted formats: afr (edam:format_2330).
         input_avg_struc_path (str): Average DNA conformation. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA_avg.pdb>`_. Accepted formats: pdb (edam:format_1476).
-        output_zip_path (str) (Optional): Filename for .zip files containing Canion output files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canion_output.zip>`_. Accepted formats: zip (edam:format_3987).
+        output_zip_path (str): Filename for .zip files containing Canion output files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canion_output.zip>`_. Accepted formats: zip (edam:format_3987).
         properties (dict):
             * **bases** (*str*) - (None) Sequence of bases to be analyzed (default is blank, meaning no specified sequence).
             * **type** (*str*) - ('*') Ions (or atoms) to be analyzed. Options are 'Na+', 'K', 'K+', 'Cl', 'Cl-', 'CL', 'P', 'C1*', 'NH1', 'NH2', 'NZ', '1' for all cations, '-1' for all anions, '0' for neutral species or '*' for all available data.
@@ -136,22 +135,14 @@ def launch(self) -> int:
             raise ValueError(("Invalid value for property type! "
                               f"Option include: {ion_type_options}"))
 
-        # Creating temporary folder
-        self.tmp_folder = fu.create_unique_dir(prefix="canion_")
-        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)
-
-        # copy input files to temporary folder
-        shutil.copy(self.io_dict['in']['input_cdi_path'], self.tmp_folder)
-        shutil.copy(self.io_dict['in']['input_afr_path'], self.tmp_folder)
-        shutil.copy(
-            self.io_dict['in']['input_avg_struc_path'], self.tmp_folder)
-        input_cdi_file = Path(self.io_dict['in']['input_cdi_path']).name
-        input_afr_file = Path(self.io_dict['in']['input_afr_path']).name
-        input_avg_struc = Path(self.io_dict['in']['input_avg_struc_path']).name
+        # define temporary file names
+        input_cdi_file = Path(self.stage_io_dict['in']['input_cdi_path']).name
+        input_afr_file = Path(self.stage_io_dict['in']['input_afr_path']).name
+        input_avg_struc = Path(self.stage_io_dict['in']['input_avg_struc_path']).name
 
         # change directory to temporary folder
         original_directory = os.getcwd()
-        os.chdir(self.tmp_folder)
+        os.chdir(self.stage_io_dict.get("unique_dir"))
 
         # create intructions
         instructions = [
@@ -187,16 +178,19 @@ def launch(self) -> int:
 
         # create zipfile and write output inside
         zf = zipfile.ZipFile(
-            Path(self.io_dict["out"]["output_zip_path"]),
+            Path(self.stage_io_dict["out"]["output_zip_path"]),
             "w")
-        for curves_outfile in Path(self.tmp_folder).glob("canion_output*"):
-            zf.write(curves_outfile, arcname=curves_outfile.name)
+        for curves_outfile in Path(self.stage_io_dict.get("unique_dir")).glob("canion_output*"):
+            if curves_outfile.suffix not in (".zip"):
+                zf.write(curves_outfile, arcname=curves_outfile.name)
         zf.close()
 
+        # Copy files to host
+        self.copy_to_host()
+
         # Remove temporary file(s)
         self.tmp_files.extend([
-            self.stage_io_dict.get("unique_dir"),
-            self.tmp_folder
+            self.stage_io_dict.get("unique_dir")
         ])
         self.remove_tmp_files()