Skip to content

Commit

Permalink
Curves 3.0.2 and bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
gbayarri committed Jun 14, 2024
1 parent 593cd35 commit 7236a0d
Show file tree
Hide file tree
Showing 38 changed files with 401 additions and 423 deletions.
2 changes: 1 addition & 1 deletion .github/env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ dependencies:
- biobb_common ==4.2.0
- pandas >=1.3.0
- scikit-learn >=0.24.2
- curves ==3.0.0
- curves ==3.0.2
- imagehash
2 changes: 1 addition & 1 deletion .github/workflows/linting_and_testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
strategy:
matrix:
os: [self-hosted]
python-version: ["3.8", "3.9"]
python-version: ["3.8", "3.9", "3.10"]
runs-on: ${{ matrix.os }}
steps:
- name: Check out repository code
Expand Down
30 changes: 10 additions & 20 deletions biobb_dna/backbone/bipopulations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
"""Module containing the BIPopulations class and the command line interface."""

import shutil
import argparse

import matplotlib.pyplot as plt
Expand All @@ -11,7 +10,6 @@
from biobb_dna.utils.transform import inverse_complement
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools.file_utils import launchlogger
from biobb_common.tools import file_utils as fu
from biobb_common.configuration import settings


Expand Down Expand Up @@ -114,28 +112,18 @@ def launch(self) -> int:
raise ValueError(
"seqpos must be a list of at least two integers")

# Creating temporary folder
self.tmp_folder = fu.create_unique_dir(prefix="backbone_")
fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)

# Copy input_file_path1 to temporary folder
shutil.copy(self.io_dict['in']['input_epsilC_path'], self.tmp_folder)
shutil.copy(self.io_dict['in']['input_epsilW_path'], self.tmp_folder)
shutil.copy(self.io_dict['in']['input_zetaC_path'], self.tmp_folder)
shutil.copy(self.io_dict['in']['input_zetaW_path'], self.tmp_folder)

# read input files
epsilC = read_series(
self.io_dict['in']['input_epsilC_path'],
self.stage_io_dict['in']['input_epsilC_path'],
usecols=self.seqpos)
epsilW = read_series(
self.io_dict['in']['input_epsilW_path'],
self.stage_io_dict['in']['input_epsilW_path'],
usecols=self.seqpos)
zetaC = read_series(
self.io_dict['in']['input_zetaC_path'],
self.stage_io_dict['in']['input_zetaC_path'],
usecols=self.seqpos)
zetaW = read_series(
self.io_dict['in']['input_zetaW_path'],
self.stage_io_dict['in']['input_zetaW_path'],
usecols=self.seqpos)

# calculate difference between epsil and zeta parameters
Expand All @@ -158,7 +146,7 @@ def launch(self) -> int:
"BI population": BI,
"BII population": BII})
Bpopulations_df.to_csv(
self.io_dict['out']['output_csv_path'],
self.stage_io_dict['out']['output_csv_path'],
index=False)

# save plot
Expand All @@ -185,14 +173,16 @@ def launch(self) -> int:
axs.set_ylabel("BI/BII Population (%)")
axs.set_title("Nucleotide parameter: BI/BII Population")
fig.savefig(
self.io_dict['out']['output_jpg_path'],
self.stage_io_dict['out']['output_jpg_path'],
format="jpg")
plt.close()

# Copy files to host
self.copy_to_host()

# Remove temporary file(s)
self.tmp_files.extend([
self.stage_io_dict.get("unique_dir"),
self.tmp_folder
self.stage_io_dict.get("unique_dir")
])
self.remove_tmp_files()

Expand Down
30 changes: 10 additions & 20 deletions biobb_dna/backbone/canonicalag.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
"""Module containing the CanonicalAG class and the command line interface."""

import shutil
import argparse

import matplotlib.pyplot as plt
Expand All @@ -11,7 +10,6 @@
from biobb_dna.utils.transform import inverse_complement
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools.file_utils import launchlogger
from biobb_common.tools import file_utils as fu
from biobb_common.configuration import settings


Expand Down Expand Up @@ -119,25 +117,15 @@ def launch(self) -> int:
raise ValueError(
"seqpos must be a list of at least two integers")

# Creating temporary folder
self.tmp_folder = fu.create_unique_dir(prefix="backbone_")
fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)

# Copy input_file_path1 to temporary folder
shutil.copy(self.io_dict['in']['input_alphaC_path'], self.tmp_folder)
shutil.copy(self.io_dict['in']['input_alphaW_path'], self.tmp_folder)
shutil.copy(self.io_dict['in']['input_gammaC_path'], self.tmp_folder)
shutil.copy(self.io_dict['in']['input_gammaW_path'], self.tmp_folder)

# read input files
alphaC = read_series(
self.io_dict['in']['input_alphaC_path'], usecols=self.seqpos)
self.stage_io_dict['in']['input_alphaC_path'], usecols=self.seqpos)
alphaW = read_series(
self.io_dict['in']['input_alphaW_path'], usecols=self.seqpos)
self.stage_io_dict['in']['input_alphaW_path'], usecols=self.seqpos)
gammaC = read_series(
self.io_dict['in']['input_gammaC_path'], usecols=self.seqpos)
self.stage_io_dict['in']['input_gammaC_path'], usecols=self.seqpos)
gammaW = read_series(
self.io_dict['in']['input_gammaW_path'], usecols=self.seqpos)
self.stage_io_dict['in']['input_gammaW_path'], usecols=self.seqpos)

# fix angle range so its not negative
alphaC = self.fix_angles(alphaC)
Expand All @@ -161,7 +149,7 @@ def launch(self) -> int:
"Nucleotide": xlabels,
"Canonical alpha/gamma": canonical_populations})
ag_populations_df.to_csv(
self.io_dict['out']['output_csv_path'],
self.stage_io_dict['out']['output_csv_path'],
index=False)

# save plot
Expand All @@ -188,14 +176,16 @@ def launch(self) -> int:
axs.set_ylabel("Canonical Alpha-Gamma (%)")
axs.set_title("Nucleotide parameter: Canonical Alpha-Gamma")
fig.savefig(
self.io_dict['out']['output_jpg_path'],
self.stage_io_dict['out']['output_jpg_path'],
format="jpg")
plt.close()

# Copy files to host
self.copy_to_host()

# Remove temporary file(s)
self.tmp_files.extend([
self.stage_io_dict.get("unique_dir"),
self.tmp_folder
self.stage_io_dict.get("unique_dir")
])
self.remove_tmp_files()

Expand Down
24 changes: 8 additions & 16 deletions biobb_dna/backbone/puckering.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
"""Module containing the Puckering class and the command line interface."""

import shutil
import argparse

import matplotlib.pyplot as plt
Expand All @@ -11,7 +10,6 @@
from biobb_dna.utils.transform import inverse_complement
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools.file_utils import launchlogger
from biobb_common.tools import file_utils as fu
from biobb_common.configuration import settings


Expand Down Expand Up @@ -112,20 +110,12 @@ def launch(self) -> int:
raise ValueError(
"seqpos must be a list of at least two integers")

# Creating temporary folder
self.tmp_folder = fu.create_unique_dir(prefix="backbone_")
fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)

# Copy input_file_path1 to temporary folder
shutil.copy(self.io_dict['in']['input_phaseC_path'], self.tmp_folder)
shutil.copy(self.io_dict['in']['input_phaseW_path'], self.tmp_folder)

# read input files
phaseC = read_series(
self.io_dict['in']['input_phaseC_path'],
self.stage_io_dict['in']['input_phaseC_path'],
usecols=self.seqpos)
phaseW = read_series(
self.io_dict['in']['input_phaseW_path'],
self.stage_io_dict['in']['input_phaseW_path'],
usecols=self.seqpos)

# fix angle range so its not negative
Expand Down Expand Up @@ -172,7 +162,7 @@ def launch(self) -> int:
axs.set_ylabel("Puckering (%)")
axs.set_title("Nucleotide parameter: Puckering")
fig.savefig(
self.io_dict['out']['output_jpg_path'],
self.stage_io_dict['out']['output_jpg_path'],
format="jpg")

# save table
Expand All @@ -183,15 +173,17 @@ def launch(self) -> int:
"West": Wpop,
"South": Spop})
populations.to_csv(
self.io_dict['out']['output_csv_path'],
self.stage_io_dict['out']['output_csv_path'],
index=False)

plt.close()

# Copy files to host
self.copy_to_host()

# Remove temporary file(s)
self.tmp_files.extend([
self.stage_io_dict.get("unique_dir"),
self.tmp_folder
self.stage_io_dict.get("unique_dir")
])
self.remove_tmp_files()

Expand Down
41 changes: 16 additions & 25 deletions biobb_dna/curvesplus/biobb_canal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

"""Module containing the Canal class and the command line interface."""
import os
import shutil
import zipfile
import argparse
from pathlib import Path
Expand Down Expand Up @@ -111,38 +110,27 @@ def launch(self) -> int:
self.stage_files()

if self.sequence is None:
if self.io_dict['in']['input_lis_file'] is None:
if self.stage_io_dict['in']['input_lis_file'] is None:
raise RuntimeError(
"if no sequence is passed in the configuration, "
"you must at least specify `input_lis_file` "
"so sequence can be parsed from there")
lis_lines = Path(
self.io_dict['in']['input_lis_file']).read_text().splitlines()
self.stage_io_dict['in']['input_lis_file']).read_text().splitlines()
for line in lis_lines:
if line.strip().startswith("Strand 1"):
self.sequence = line.split(" ")[-1]
fu.log(
f"using sequence {self.sequence} "
f"from {self.io_dict['in']['input_lis_file']}",
f"from {self.stage_io_dict['in']['input_lis_file']}",
self.out_log)

# Creating temporary folder
self.tmp_folder = fu.create_unique_dir(prefix="canal_")
fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)

# copy input files to temporary folder
shutil.copy(
self.io_dict['in']['input_cda_file'],
self.tmp_folder)
tmp_cda_path = Path(self.io_dict['in']['input_cda_file']).name
if self.io_dict['in']['input_lis_file'] is not None:
shutil.copy(
self.io_dict['in']['input_lis_file'],
self.tmp_folder)
# define temporary file name
tmp_cda_path = Path(self.stage_io_dict['in']['input_cda_file']).name

# change directory to temporary folder
original_directory = os.getcwd()
os.chdir(self.tmp_folder)
os.chdir(self.stage_io_dict.get("unique_dir"))

# create intructions
instructions = [
Expand Down Expand Up @@ -183,17 +171,20 @@ def launch(self) -> int:

# create zipfile and write output inside
zf = zipfile.ZipFile(
Path(self.io_dict["out"]["output_zip_path"]), "w")
for canal_outfile in Path(self.tmp_folder).glob("canal_output*"):
zf.write(
canal_outfile,
arcname=canal_outfile.name)
Path(self.stage_io_dict["out"]["output_zip_path"]), "w")
for canal_outfile in Path(self.stage_io_dict.get("unique_dir")).glob("canal_output*"):
if canal_outfile.suffix not in (".zip"):
zf.write(
canal_outfile,
arcname=canal_outfile.name)
zf.close()

# Copy files to host
self.copy_to_host()

# Remove temporary file(s)
self.tmp_files.extend([
self.stage_io_dict.get("unique_dir"),
self.tmp_folder
self.stage_io_dict.get("unique_dir")
])
self.remove_tmp_files()

Expand Down
34 changes: 14 additions & 20 deletions biobb_dna/curvesplus/biobb_canion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import zipfile
import argparse
import shutil
from pathlib import Path
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.configuration import settings
Expand All @@ -22,7 +21,7 @@ class Canion(BiobbObject):
input_cdi_path (str): Trajectory input file. File type: input. `Sample file <https://mmb.irbbarcelona.org/biobb-dev/biobb-api/public/samples/THGA_K.cdi>`_. Accepted formats: cdi (edam:format_2330).
input_afr_path (str): Helical axis frames corresponding to the input conformation to be analyzed. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA.afr>`_. Accepted formats: afr (edam:format_2330).
input_avg_struc_path (str): Average DNA conformation. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA_avg.pdb>`_. Accepted formats: pdb (edam:format_1476).
output_zip_path (str) (Optional): Filename for .zip files containing Canion output files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canion_output.zip>`_. Accepted formats: zip (edam:format_3987).
output_zip_path (str): Filename for .zip files containing Canion output files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canion_output.zip>`_. Accepted formats: zip (edam:format_3987).
properties (dict):
* **bases** (*str*) - (None) Sequence of bases to be analyzed (default is blank, meaning no specified sequence).
* **type** (*str*) - ('*') Ions (or atoms) to be analyzed. Options are 'Na+', 'K', 'K+', 'Cl', 'Cl-', 'CL', 'P', 'C1*', 'NH1', 'NH2', 'NZ', '1' for all cations, '-1' for all anions, '0' for neutral species or '*' for all available data.
Expand Down Expand Up @@ -136,22 +135,14 @@ def launch(self) -> int:
raise ValueError(("Invalid value for property type! "
f"Option include: {ion_type_options}"))

# Creating temporary folder
self.tmp_folder = fu.create_unique_dir(prefix="canion_")
fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)

# copy input files to temporary folder
shutil.copy(self.io_dict['in']['input_cdi_path'], self.tmp_folder)
shutil.copy(self.io_dict['in']['input_afr_path'], self.tmp_folder)
shutil.copy(
self.io_dict['in']['input_avg_struc_path'], self.tmp_folder)
input_cdi_file = Path(self.io_dict['in']['input_cdi_path']).name
input_afr_file = Path(self.io_dict['in']['input_afr_path']).name
input_avg_struc = Path(self.io_dict['in']['input_avg_struc_path']).name
# define temporary file names
input_cdi_file = Path(self.stage_io_dict['in']['input_cdi_path']).name
input_afr_file = Path(self.stage_io_dict['in']['input_afr_path']).name
input_avg_struc = Path(self.stage_io_dict['in']['input_avg_struc_path']).name

# change directory to temporary folder
original_directory = os.getcwd()
os.chdir(self.tmp_folder)
os.chdir(self.stage_io_dict.get("unique_dir"))

# create intructions
instructions = [
Expand Down Expand Up @@ -187,16 +178,19 @@ def launch(self) -> int:

# create zipfile and write output inside
zf = zipfile.ZipFile(
Path(self.io_dict["out"]["output_zip_path"]),
Path(self.stage_io_dict["out"]["output_zip_path"]),
"w")
for curves_outfile in Path(self.tmp_folder).glob("canion_output*"):
zf.write(curves_outfile, arcname=curves_outfile.name)
for curves_outfile in Path(self.stage_io_dict.get("unique_dir")).glob("canion_output*"):
if curves_outfile.suffix not in (".zip"):
zf.write(curves_outfile, arcname=curves_outfile.name)
zf.close()

# Copy files to host
self.copy_to_host()

# Remove temporary file(s)
self.tmp_files.extend([
self.stage_io_dict.get("unique_dir"),
self.tmp_folder
self.stage_io_dict.get("unique_dir")
])
self.remove_tmp_files()

Expand Down
Loading

0 comments on commit 7236a0d

Please sign in to comment.