From 5a5a4394fdd73ff0cae828e63363493c0b703bf7 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 26 Jun 2024 15:40:07 +0200 Subject: [PATCH 01/32] adding checks for duplicated parameters in defaults.yaml --- tests/test_gear_yaml2cfg.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/test_gear_yaml2cfg.py b/tests/test_gear_yaml2cfg.py index dbfc28cf5..b0dff15b4 100644 --- a/tests/test_gear_yaml2cfg.py +++ b/tests/test_gear_yaml2cfg.py @@ -1,6 +1,10 @@ """Test yaml2cfg gear.""" import filecmp +import re +import os +import pytest from pathlib import Path +from fnmatch import fnmatch from haddock.gear.yaml2cfg import flat_yaml_cfg, yaml2cfg_text from haddock.libs.libio import read_from_yaml @@ -11,6 +15,17 @@ haddock3_yaml_converted_no_header, ) +@pytest.fixture +def default_yaml_files(): + """Return list of defaults.yaml file withing the haddock src directory.""" + all_defaults_yaml: list[str] = [] + default_yaml_fname = "defaults.yaml" + for path, _subdirs, files in os.walk('../src/haddock/'): + for name in files: + if fnmatch(name, default_yaml_fname): + all_defaults_yaml.append(f"{path}/{default_yaml_fname}") + return all_defaults_yaml + complex_cfg = { "param1": { @@ -97,3 +112,19 @@ def test_yaml2cfg_test_no_header(): shallow=False, ) p.unlink() + + +def test_yaml_duplicated_params(default_yaml_files): + """Make sure no duplicated parameters are present in a ymal file.""" + # Build regular expression + yaml_param_regex = re.compile("^(([A-Za-z0-9]_?)+):") + for yaml_fpath in default_yaml_files: + # Loop over default yaml files + parsed_param_names: dict[str, int] = {} + with open(yaml_fpath, 'r') as filin: + yaml_content = filin.readlines() + for i, line in enumerate(yaml_content, start=1): + if (match := yaml_param_regex.search(line)): + param_name = match.group(1) + assert param_name not in parsed_param_names.keys(), f"Parameter '{param_name}' in {yaml_fpath} has duplicates: l.{parsed_param_names[param_name]} and l.{i}" # noqa : E501 + parsed_param_names[param_name] = i From 5b109b5806a45f5c42aa99cb050bc614b738fff6 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 3 Jul 2024 15:47:55 +0200 Subject: [PATCH 02/32] trial1 --- src/haddock/clis/__init__.py | 6 +- src/haddock/core/__init__.py | 4 +- src/haddock/core/defaults.py | 3 + src/haddock/core/typing.py | 1 + src/haddock/gear/prepare_run.py | 99 +++++--- src/haddock/gear/preprocessing.py | 11 +- src/haddock/libs/libio.py | 72 +++--- src/haddock/libs/libontology.py | 228 +++++++++++++++--- src/haddock/libs/libpdb.py | 46 ++-- src/haddock/libs/libstructure.py | 10 +- src/haddock/libs/libutil.py | 13 + src/haddock/libs/libworkflow.py | 21 +- src/haddock/modules/__init__.py | 47 ++-- .../modules/topology/topoaa/__init__.py | 198 ++++----------- .../modules/topology/topoaa/defaults.yaml | 1 - tests/test_libworkflow.py | 22 +- 16 files changed, 463 insertions(+), 319 deletions(-) diff --git a/src/haddock/clis/__init__.py b/src/haddock/clis/__init__.py index 768d257c0..33cf0707f 100644 --- a/src/haddock/clis/__init__.py +++ b/src/haddock/clis/__init__.py @@ -1,8 +1,8 @@ """ Command-line interfaces. -HADDOCK3 has a series of command-line interfaces (CLIs), from which the most important -one is ``haddock3``. You can ask help to the CLIs with the ``-h`` flag:: - +HADDOCK3 has a series of command-line interfaces (CLIs), from which the most +important one is ``haddock3``. +You can ask help to the CLIs with the ``-h`` flag: haddock3 -h """ diff --git a/src/haddock/core/__init__.py b/src/haddock/core/__init__.py index 5fbd962b5..114481b4a 100644 --- a/src/haddock/core/__init__.py +++ b/src/haddock/core/__init__.py @@ -1,3 +1 @@ -""" -Core haddock3 functionalities. -""" +"""Core haddock3 functionalities.""" diff --git a/src/haddock/core/defaults.py b/src/haddock/core/defaults.py index 20a705292..b8bb70620 100644 --- a/src/haddock/core/defaults.py +++ b/src/haddock/core/defaults.py @@ -37,6 +37,9 @@ INTERACTIVE_RE_SUFFIX = "interactive" """Suffix added to interactive haddock3-re runs.""" +DATA_DIRNAME = "data" +"""Name given to the directory holding data.""" + CNS_MODULES = ["rigidbody", "flexref", "emscoring", diff --git a/src/haddock/core/typing.py b/src/haddock/core/typing.py index 73a523b0d..c8155caab 100644 --- a/src/haddock/core/typing.py +++ b/src/haddock/core/typing.py @@ -30,6 +30,7 @@ Generic, Iterable, Iterator, + List, Literal, Mapping, MutableMapping, diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py index 075d2e548..d4c2ab2af 100644 --- a/src/haddock/gear/prepare_run.py +++ b/src/haddock/gear/prepare_run.py @@ -4,8 +4,8 @@ import itertools as it import json import os +import re import shutil -import string import sys import tarfile from contextlib import contextmanager, suppress @@ -14,7 +14,7 @@ from pathlib import Path, PosixPath from haddock import EmptyPath, contact_us, haddock3_source_path, log -from haddock.core.defaults import RUNDIR, max_molecules_allowed +from haddock.core.defaults import RUNDIR, max_molecules_allowed, DATA_DIRNAME from haddock.core.exceptions import ConfigurationError, ModuleError from haddock.core.typing import ( Any, @@ -296,7 +296,7 @@ def setup_run( if restarting_from: remove_folders_after_number(general_params[RUNDIR], restart_from) - _data_dir = Path(general_params[RUNDIR], "data") + _data_dir = Path(general_params[RUNDIR], DATA_DIRNAME) remove_folders_after_number(_data_dir, restart_from) if restarting_from or starting_from_copy: @@ -315,6 +315,12 @@ def setup_run( dec_all=True, ) + first_module_id = list(modules_params.keys())[0] + if (topoaa_module_id := "topoaa.1") in modules_params.keys(): + topology_params = modules_params[topoaa_module_id] + else: + topology_params = {} + if starting_from_copy: num_steps = len(step_folders) _num_modules = len(modules_params) @@ -327,24 +333,27 @@ def setup_run( else: copy_molecules_to_topology( general_params["molecules"], - modules_params["topoaa.1"], + topology_params, ) + # copy_molecules_to_topology( + # general_params["molecules"], + # modules_params[first_module_id], + # ) - if len(modules_params["topoaa.1"]["molecules"]) > max_molecules_allowed: + max_mols = len(topology_params["molecules"]) + if max_mols > max_molecules_allowed: raise ConfigurationError( f"Too many molecules defined, max is {max_molecules_allowed}." - ) # noqa: E501 + ) zero_fill.read(modules_params) - populate_topology_molecule_params(modules_params["topoaa.1"]) - populate_mol_parameters(modules_params) - - max_mols = len(modules_params["topoaa.1"]["molecules"]) + populate_topology_molecule_params(topology_params) + populate_mol_parameters(modules_params, topology_params) if not from_scratch: _prev, _new = renum_step_folders(general_params[RUNDIR]) - renum_step_folders(Path(general_params[RUNDIR], "data")) + renum_step_folders(Path(general_params[RUNDIR], DATA_DIRNAME)) if UNPACK_FOLDERS: # only if there was any folder unpacked update_unpacked_names(_prev, _new, UNPACK_FOLDERS) update_step_contents_to_step_names( @@ -367,7 +376,8 @@ def setup_run( if scratch_rest0: copy_molecules_to_data_dir( data_dir, - modules_params["topoaa.1"], + topology_params, + first_module_id, preprocess=general_params["preprocess"], ) @@ -418,8 +428,9 @@ def save_configuration_files(configs: dict, datadir: Union[str, Path]) -> dict: # Initiate files data infofile = { "raw_input": ( - "An untouched copy of the raw input file, " "as provided by the user." - ), + "An untouched copy of the raw input file, " + "as provided by the user." + ), "cleaned_input": ( "Pre-parsed input file where (eventually) " "some indexing and modifications were " @@ -427,8 +438,8 @@ def save_configuration_files(configs: dict, datadir: Union[str, Path]) -> dict: ), "enhanced_haddock_params": ( "Final input file with detailed default parameters." - ), - } + ), + } added_files = {} # Set list of configurations that wish to be saved list_save_conf = [ @@ -775,7 +786,7 @@ def create_data_dir(run_dir: FilePath) -> Path: pathlib.Path A path referring only to 'data'. """ - data_dir = Path(run_dir, "data") + data_dir = Path(run_dir, DATA_DIRNAME) data_dir.mkdir(parents=True, exist_ok=True) return data_dir @@ -789,8 +800,11 @@ def copy_molecules_to_topology( def copy_molecules_to_data_dir( - data_dir: Path, topoaa_params: ParamMap, preprocess: bool = True -) -> None: + data_dir: Path, + topoaa_params: ParamMap, + _first_module_name: str, + preprocess: bool = True, + ) -> None: """ Copy molecules to data directory and to topoaa parameters. @@ -807,7 +821,13 @@ def copy_molecules_to_data_dir( Whether to preprocess input molecules. Defaults to ``True``. See :py:mod:`haddock.gear.preprocessing`. """ - topoaa_dir = zero_fill.fill("topoaa", 0) + # Removes digit from module name + # Build regex to capture '.' + name_digit_regex = re.compile(r"(\w+)\.\d+") + first_module_name: str = "input_molecules" + if match := name_digit_regex.search(_first_module_name): + first_module_name = match.group(1) + topoaa_dir = zero_fill.fill(first_module_name, 0) # define paths data_topoaa_dir = Path(data_dir, topoaa_dir) @@ -815,32 +835,34 @@ def copy_molecules_to_data_dir( rel_data_topoaa_dir = Path(data_dir.name, topoaa_dir) original_mol_dir = Path(data_dir, "original_molecules") + # Init new molecule holder to be filled with relative paths new_molecules: list[Path] = [] + # Loop over input molecules for molecule in copy(topoaa_params["molecules"]): check_if_path_exists(molecule) - mol_name = Path(molecule).name - - if preprocess: # preprocess PDB files - top_fname = topoaa_params.get("ligand_top_fname", False) - new_residues = read_additional_residues(top_fname) if top_fname else None - - new_pdbs = process_pdbs(molecule, user_supported_residues=new_residues) - - # copy the original molecule + # preprocess PDB files + if preprocess: + # copy the un-processed molecule (for later checks) original_mol_dir.mkdir(parents=True, exist_ok=True) original_mol = Path(original_mol_dir, mol_name) shutil.copy(molecule, original_mol) - + # Gather potential user-provided topology file + top_fname = topoaa_params.get("ligand_top_fname", False) + new_residues = read_additional_residues(top_fname) if top_fname else None + # Do the pre-processing of file + new_pdbs = process_pdbs(molecule, user_supported_residues=new_residues) # write the new processed molecule new_pdb = os.linesep.join(new_pdbs[0]) Path(data_topoaa_dir, mol_name).write_text(new_pdb) - + # Do not preprocess else: + # Create a copy of input molecules into `data/0_firstmodule` shutil.copy(molecule, Path(data_topoaa_dir, mol_name)) - - new_molecules.append(Path(rel_data_topoaa_dir, mol_name)) - + # Create relative path of the molecule + data_dir_molecule_relpath = Path(rel_data_topoaa_dir, mol_name) + new_molecules.append(data_dir_molecule_relpath) + # Modify molecules parameters to point relative path of copied files topoaa_params["molecules"] = copy(new_molecules) @@ -984,7 +1006,7 @@ def get_expandable_parameters( # the topoaa module is an exception because it has subdictionaries # for the `mol` parameter. Instead of defining a general recursive # function, I decided to add a simple if/else exception. - # no other module should have subdictionaries has parameters + # no other module should have subdictionaries as parameters if get_module_name(module_name) == "topoaa": ap: set[str] = set() # allowed_parameters ap.update(_get_expandable(user_config, defaults, module_name, max_mols)) @@ -1053,7 +1075,10 @@ def populate_topology_molecule_params(topoaa: ParamMap) -> None: return -def populate_mol_parameters(modules_params: ParamMap) -> None: +def populate_mol_parameters( + modules_params: ParamMap, + topology_params: ParamMap, + ) -> None: """ Populate modules subdictionaries with the needed molecule `mol_` parameters. @@ -1079,7 +1104,7 @@ def populate_mol_parameters(modules_params: ParamMap) -> None: Alter the dictionary in place. """ # the starting number of the `mol_` parameters is 1 by CNS definition. - num_mols = range(1, len(modules_params["topoaa.1"]["molecules"]) + 1) + num_mols = range(1, len(topology_params["molecules"]) + 1) for module_name, _ in modules_params.items(): # read the modules default parameters defaults = _read_defaults(module_name) diff --git a/src/haddock/gear/preprocessing.py b/src/haddock/gear/preprocessing.py index 1df1db513..ecaabe2ca 100644 --- a/src/haddock/gear/preprocessing.py +++ b/src/haddock/gear/preprocessing.py @@ -386,14 +386,15 @@ def process_pdbs( residues=set.union( supported_HETATM, user_supported_residues or set(), + ), ), - ), convert_HETATM_to_ATOM, partial(wrep_pdb_fixinsert, option_list=[]), ##### partial( - remove_unsupported_hetatm, user_defined=user_supported_residues - ), # noqa: E501 + remove_unsupported_hetatm, + user_defined=user_supported_residues, + ), partial(remove_unsupported_atom), #### # partial(wrep_pdb_shiftres, shifting_factor=0), @@ -401,7 +402,7 @@ def process_pdbs( wrep_pdb_tidy, ### wrep_rstrip, - ] + ] # these functions take the whole PDB content, evaluate it, and # modify it if needed. @@ -455,7 +456,7 @@ def process_pdbs( wrep_pdb_tidy_strict = _report("pdb_tidy")(partial(pdb_tidy.run, strict=True)) wrep_rstrip = _report("str.rstrip")( partial(map, lambda x: x.rstrip(linesep)) -) # noqa: E501 + ) @_report("Replacing HETATM to ATOM for residue {!r}") diff --git a/src/haddock/libs/libio.py b/src/haddock/libs/libio.py index 5b32abeec..948cddb4f 100644 --- a/src/haddock/libs/libio.py +++ b/src/haddock/libs/libio.py @@ -20,7 +20,7 @@ Iterable, Mapping, Optional, -) + ) from haddock.libs.libontology import PDBFile from haddock.libs.libutil import sort_numbered_paths @@ -130,8 +130,8 @@ def open_files_to_lines(*files: FilePath) -> list[list[str]]: def save_lines_to_files( - files: Iterable[FilePath], lines: Iterable[Iterable[str]] -) -> None: + files: Iterable[FilePath], lines: Iterable[Iterable[str]] + ) -> None: """ Save a list of list of lines to files. @@ -156,8 +156,8 @@ def save_lines_to_files( def add_suffix_to_files( - files: Iterable[FilePath], suffix: str -) -> Generator[Path, None, None]: + files: Iterable[FilePath], suffix: str + ) -> Generator[Path, None, None]: """ Add a suffix to file paths. @@ -176,11 +176,11 @@ def add_suffix_to_files( def write_dic_to_file( - data_dict: Mapping[Any, Any], - output_fname: FilePath, - info_header: str = "", - sep: str = "\t", -) -> None: + data_dict: Mapping[Any, Any], + output_fname: FilePath, + info_header: str = "", + sep: str = "\t", + ) -> None: """ Create a table from a dictionary. @@ -219,11 +219,11 @@ def write_dic_to_file( def write_nested_dic_to_file( - data_dict: Mapping[Any, Any], - output_fname: FilePath, - info_header: str = "", - sep: str = "\t", -) -> None: + data_dict: Mapping[Any, Any], + output_fname: FilePath, + info_header: str = "", + sep: str = "\t", + ) -> None: """ Create a table from a nested dictionary. @@ -279,8 +279,8 @@ def working_directory(path: FilePath) -> Generator[None, None, None]: def compress_files_ext( - path: FilePath, ext: str, ncores: int = 1, **kwargs: Any -) -> bool: + path: FilePath, ext: str, ncores: int = 1, **kwargs: Any + ) -> bool: """ Compress all files with same extension in folder to `.gz`. @@ -318,11 +318,11 @@ def compress_files_ext( def gzip_files( - file_: FilePath, - block_size: Optional[int] = None, - compresslevel: int = 9, - remove_original: bool = False, -) -> None: + file_: FilePath, + block_size: Optional[int] = None, + compresslevel: int = 9, + remove_original: bool = False, + ) -> None: """ Gzip a file. @@ -343,8 +343,8 @@ def gzip_files( gfile = str(file_) + ".gz" with open(file_, "rb") as fin, gzip.open( - gfile, mode="wb", compresslevel=compresslevel - ) as gout: + gfile, mode="wb", compresslevel=compresslevel + ) as gout: content = fin.read(block_size) # read the first while content: gout.write(content) @@ -382,10 +382,10 @@ def archive_files_ext(path: FilePath, ext: str, compresslevel: int = 9) -> bool: if files: with tarfile.open( - Path(path, f"{ext}.tgz"), - mode="w:gz", - compresslevel=compresslevel, - ) as tarout: + Path(path, f"{ext}.tgz"), + mode="w:gz", + compresslevel=compresslevel, + ) as tarout: for file_ in files: tarout.add(file_, arcname=file_.name) @@ -439,10 +439,10 @@ def remove_files_with_ext(folder: FilePath, ext: str) -> None: def folder_exists( - path: FilePath, - exception: type[Exception] = ValueError, - emsg: str = "The folder {!r} does not exist or is not a folder.", -) -> Path: + path: FilePath, + exception: type[Exception] = ValueError, + emsg: str = "The folder {!r} does not exist or is not a folder.", + ) -> Path: """ Assert if a folder exist. @@ -482,10 +482,10 @@ def folder_exists( def file_exists( - path: FilePath, - exception: type[Exception] = ValueError, - emsg: str = "`path` is not a file or does not exist", -) -> Path: + path: FilePath, + exception: type[Exception] = ValueError, + emsg: str = "`path` is not a file or does not exist", + ) -> Path: """ Assert if file exist. diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py index 0dc267765..3c4317406 100644 --- a/src/haddock/libs/libontology.py +++ b/src/haddock/libs/libontology.py @@ -1,16 +1,25 @@ """Describe the Haddock3 ontology used for communicating between modules.""" import datetime import itertools +import os +import re from enum import Enum from os import linesep +from os.path import getmtime from pathlib import Path - import jsonpickle from haddock.core.defaults import MODULE_IO_FILE -from haddock.core.typing import FilePath, Literal, Optional, TypeVar, Union -from typing import List, Any +from haddock.core.typing import ( + Any, + FilePath, + List, + Optional, + TypeVar, + Union, + ) +from haddock.libs import libpdb NaN = float("nan") @@ -34,13 +43,13 @@ class Persistent: """Any persistent file generated by this framework.""" def __init__( - self, - file_name: FilePath, - file_type: Format, - path: FilePath = ".", - md5: Optional[str] = None, - restr_fname: Optional[FilePath] = None, - ) -> None: + self, + file_name: FilePath, + file_type: Format, + path: FilePath = ".", + md5: Optional[str] = None, + restr_fname: Optional[FilePath] = None, + ) -> None: self.created = datetime.datetime.now().isoformat(" ", "seconds") self.file_name = Path(file_name).name self.file_type = file_type @@ -52,8 +61,9 @@ def __init__( def __repr__(self) -> str: rep = ( - f"[{self.file_type}|{self.created}] " f"{Path(self.path) / self.file_name}" - ) + f"[{self.file_type}|{self.created}] " + f"{Path(self.path) / self.file_name}" + ) return rep def is_present(self) -> bool: @@ -65,15 +75,15 @@ class PDBFile(Persistent): """Represent a PDB file.""" def __init__( - self, - file_name: Union[Path, str], - topology: Optional[Any] = None, - path: Union[Path, str] = ".", - score: float = NaN, - md5: Optional[str] = None, - restr_fname: Optional[Union[Path, str]] = None, - unw_energies: Optional[dict[str, float]] = None, - ) -> None: + self, + file_name: Union[Path, str], + topology: Optional[Any] = None, + path: Union[Path, str] = ".", + score: float = NaN, + md5: Optional[str] = None, + restr_fname: Optional[Union[Path, str]] = None, + unw_energies: Optional[dict[str, float]] = None, + ) -> None: super().__init__(file_name, Format.PDB, path, md5, restr_fname) self.topology = topology @@ -101,7 +111,12 @@ def __hash__(self) -> int: class RMSDFile(Persistent): """Represents a RMSD matrix file.""" - def __init__(self, file_name: FilePath, npairs: int, path: FilePath = ".") -> None: + def __init__( + self, + file_name: FilePath, + npairs: int, + path: FilePath = ".", + ) -> None: super().__init__(file_name, Format.MATRIX, path) self.npairs = npairs @@ -136,7 +151,11 @@ def add(self, persistent, mode="i"): else: self.output.append(persistent) - def save(self, path: FilePath = ".", filename: FilePath = MODULE_IO_FILE) -> Path: + def save( + self, + path: FilePath = ".", + filename: FilePath = MODULE_IO_FILE, + ) -> Path: """Save Input/Output needed files by this module to disk.""" fpath = Path(path, filename) with open(fpath, "w") as output_handler: @@ -147,14 +166,15 @@ def save(self, path: FilePath = ".", filename: FilePath = MODULE_IO_FILE) -> Pat def load(self, filename: FilePath) -> None: """Load the content of a given IO filename.""" - with open(filename) as json_file: - content = jsonpickle.decode(json_file.read()) - self.input = content["input"] # type: ignore - self.output = content["output"] # type: ignore + if filename.is_file(): + with open(filename) as json_file: + content = jsonpickle.decode(json_file.read()) + self.input = content["input"] # type: ignore + self.output = content["output"] # type: ignore def retrieve_models( - self, crossdock: bool = False, individualize: bool = False - ) -> list[Union[PDBFile, list[PDBFile]]]: + self, crossdock: bool = False, individualize: bool = False + ) -> list[Union[PDBFile, list[PDBFile]]]: """Retrieve the PDBobjects to be used in the module.""" # Get the models generated in previous step model_list: list[PDBFile] = [] @@ -168,7 +188,6 @@ def retrieve_models( elif element.file_type == Format.PDB: # type: ignore model_list.append(element) # type: ignore - if input_dic and not crossdock and not individualize: # check if all ensembles contain the same number of models sub_lists = iter(input_dic.values()) @@ -177,13 +196,17 @@ def retrieve_models( _msg = ( "Different number of models in molecules," " cannot prepare pairwise complexes." - ) + ) raise Exception(_msg) # prepare pairwise combinations - model_list = [values for values in zip(*input_dic.values())] # type: ignore + model_list = [ + values for values in zip(*input_dic.values()) + ] # type: ignore elif input_dic and crossdock and not individualize: - model_list = [values for values in itertools.product(*input_dic.values())] # type: ignore + model_list = [ + values for values in itertools.product(*input_dic.values()) + ] # type: ignore elif input_dic and individualize: model_list = list(itertools.chain(*input_dic.values())) @@ -231,11 +254,148 @@ def remove_missing(self) -> None: if not element.is_present(): idxs.append(idx) - self.output = [value for i, value in enumerate(self.output) if i not in idxs] + self.output = [ + value for i, value in enumerate(self.output) + if i not in idxs + ] def __repr__(self) -> str: return f"Input: {self.input}{linesep}Output: {self.output}" + def load_from_input_molecules( + self, + input_molecules_dir: Path, + ) -> None: + """Load first molecules at the stat of the workflow. + + Parameters + ---------- + input_molecules_dir : Path + Directory where the input molecules are stored. + """ + # Gather all input molecules + input_molecules = list(input_molecules_dir.glob('*.pdb')) + # Sort them by creation date (which is also input order) + input_molecules.sort(key=getmtime) # FIXME: getctime ? + # Set input attribute + self.input = input_molecules + + # Set parsing variables + molecules_dic: dict[int, dict[int, PDBFile]] = {} + # Loop over input molecules + for i, molecule in enumerate(self.input, start=1): + # Split models (these come already sorted) + splited_models = libpdb.split_ensemble( + molecule, + dest=input_molecules_dir, + ) + # get the MD5 hash of each model + md5_dic = self.get_md5(molecule) + origin_names = self.get_ensemble_origin(molecule) + # Initiate with empty list + molecules_dic.setdefault(i, {}) + # Loop over conformers of this ensemble + for j, model in enumerate(splited_models): + processed_model = model + model_name = model.stem + # Search of md5 information + md5_hash = None + try: + model_id = int(model_name.split("_")[-1]) + except ValueError: + model_id = 0 + if model_id in md5_dic: + md5_hash = md5_dic[model_id] + # Check if origin or md5 is available + if md5_hash or model_id in origin_names.keys(): + # Select prefix + if md5_hash: # Prioritize the md5 hash + prefix_name = md5_hash + else: + prefix_name = origin_names[model_id] + # Build new filename + model_new_name = f"{prefix_name}_from_{model_name}" + # Rename file + processed_model = model.rename( + Path( + input_molecules_dir, + f"{model_new_name}.{Format.PDB}", + ) + ) + # Create a PDBFile object + pdbfile = PDBFile( + processed_model, + md5=md5_hash, + ) + # Modify relative path attribute + pdbfile.rel_path = Path( + "..", + input_molecules_dir, + pdbfile.file_name + ) + # Set origin name + pdbfile.ori_name = molecule + # Hold that conformer/model + molecules_dic[i][j] = pdbfile + # And fake them to be the output of the previous io + self.output = list(molecules_dic.values()) + + @staticmethod + def get_md5(ensemble_f: FilePath) -> dict[int, str]: + """Get MD5 hash of a multi-model PDB file.""" + md5_dic: dict[int, str] = {} + text = Path(ensemble_f).read_text() + lines = text.split(os.linesep) + REMARK_lines = (line for line in lines if line.startswith("REMARK")) + remd5 = re.compile(r"^[a-f0-9]{32}$") + for line in REMARK_lines: + parts = line.strip().split() + + try: + idx = parts.index("MODEL") + except ValueError: # MODEL not in parts, this line can be ignored + continue + + # check if there's a md5 hash in line + for part in parts: + group = remd5.fullmatch(part) + if group: + # the model num comes after the MODEL + model_num = int(parts[idx + 1]) + md5_dic[model_num] = group.string # md5 hash + break + + return md5_dic + + @staticmethod + def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]: + """Try to find origin for each model in ensemble. + + Parameters + ---------- + ensemble_f : FilePath + Path to a pdb file containing an ensemble. + + Returns + ------- + origin_dic : dict[int, str] + Dictionary holding as keys the modelID and values its origin. + """ + origin_dic: dict[int, str] = {} + text = Path(ensemble_f).read_text() + lines = text.split(os.linesep) + REMARK_lines = (line for line in lines if line.startswith("REMARK")) + re_origin = re.compile( + r"REMARK\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+(([\w_-]+\.?)+)" + ) + for line in REMARK_lines: + if (match := re_origin.search(line)): + model_num = int(match.group(1).strip()) + original_path = match.group(3).strip() + original_name = Path(original_path).stem + origin_dic[model_num] = original_name + return origin_dic + PDBPath = Union[PDBFile, Path] diff --git a/src/haddock/libs/libpdb.py b/src/haddock/libs/libpdb.py index 1ced78d8a..7d7e4ec6b 100644 --- a/src/haddock/libs/libpdb.py +++ b/src/haddock/libs/libpdb.py @@ -17,8 +17,11 @@ Optional, Union, ) -from haddock.libs.libio import working_directory -from haddock.libs.libutil import get_result_or_same_in_list, sort_numbered_paths +from haddock.libs.libutil import ( + get_result_or_same_in_list, + sort_numbered_paths, + working_directory, + ) slc_record = slice(0, 6) @@ -110,8 +113,10 @@ def get_supported_residues(haddock_topology: FilePath) -> list[str]: _to_keep = list(supported_residues) -def split_ensemble(pdb_file_path: Path, - dest: Optional[FilePath] = None) -> list[Path]: +def split_ensemble( + pdb_file_path: Path, + dest: Optional[FilePath] = None, + ) -> list[Path]: """ Split a multimodel PDB file into different structures. @@ -119,15 +124,24 @@ def split_ensemble(pdb_file_path: Path, ---------- dest : str or pathlib.Path Destination folder. + + Returns + ------- + pdb_files_list : list[Path] + List of pdb file(s). """ if dest is None: dest = Path.cwd() - assert pdb_file_path.is_file(), pdb_file_path + assert pdb_file_path.is_file(), \ + f"File '{pdb_file_path}' could not be found in file system." with open(pdb_file_path) as input_handler: with working_directory(dest): split_model(input_handler) - - return sort_numbered_paths(*get_new_models(pdb_file_path)) + pdb_files_list = sort_numbered_paths( + *get_new_models(pdb_file_path) + ) + pdb_files_path = [Path(dest, pdb_fname) for pdb_fname in pdb_files_list] + return pdb_files_path def split_by_chain(pdb_file_path: FilePath) -> list[Path]: @@ -164,7 +178,8 @@ def swap_segid_chain(pdb_file_path: FilePath, def sanitize( pdb_file_path: FilePathT, overwrite: bool = True, - custom_topology: Optional[FilePath] = None) -> Union[FilePathT, Path]: + custom_topology: Optional[FilePath] = None, + ) -> Union[FilePathT, Path]: """Sanitize a PDB file.""" if custom_topology: custom_res_to_keep = get_supported_residues(custom_topology) @@ -179,7 +194,7 @@ def sanitize( for tag, new_tag in _to_rename.items(): line = line.replace(tag, new_tag) # check if this residue is known - res = line[17:20].strip() + res = line[slc_resname].strip() if res and res in _to_keep: good_lines.append(line) if len(good_lines) > 0 and good_lines[-1] != "END": @@ -206,11 +221,11 @@ def identify_chainseg(pdb_file_path: FilePath, for line in input_handler: if line.startswith(("ATOM ", "HETATM")): try: - segid = line[72:76].strip()[:1] + segid = line[slc_segid].strip()[:1] except IndexError: segid = "" try: - chainid = line[21].strip() + chainid = line[slc_chainid].strip() except IndexError: chainid = "" @@ -221,7 +236,8 @@ def identify_chainseg(pdb_file_path: FilePath, if not segid and not chainid: raise ValueError( - f"Could not identify chainID or segID in pdb {pdb_file_path}, line {line}" + "Could not identify chainID or segID" + f" in pdb {pdb_file_path}, line {line}" ) if sort: @@ -246,8 +262,10 @@ def get_new_models(pdb_file_path: FilePath) -> list[Path]: return new_models -def get_pdb_file_suffix_variations(file_name: FilePath, - sep: str = "_") -> list[Path]: +def get_pdb_file_suffix_variations( + file_name: FilePath, + sep: str = "_", + ) -> list[Path]: """ List suffix variations of a PDB file in the current path. diff --git a/src/haddock/libs/libstructure.py b/src/haddock/libs/libstructure.py index 9f8164436..46e6d6d62 100644 --- a/src/haddock/libs/libstructure.py +++ b/src/haddock/libs/libstructure.py @@ -23,10 +23,12 @@ class Molecule: as ``file_name``. """ - def __init__(self, - file_name: Path, - segid: Optional[int] = None, - no_parent: bool = False) -> None: + def __init__( + self, + file_name: Path, + segid: Optional[int] = None, + no_parent: bool = False, + ) -> None: # the rest of the code is too dependent on the Path API assert isinstance(file_name, Path), \ f"`file_name` must be pathlib.Path: {type(file_name)} given" diff --git a/src/haddock/libs/libutil.py b/src/haddock/libs/libutil.py index 60a07a52c..2fdbce576 100644 --- a/src/haddock/libs/libutil.py +++ b/src/haddock/libs/libutil.py @@ -1,6 +1,7 @@ """General utilities.""" import collections.abc import contextlib +import os import re import shutil import subprocess @@ -379,3 +380,15 @@ def recursive_convert_paths_to_strings(params: ParamMapT) -> ParamMapT: params[param] = value return params + + +# thanks to @brianjimenez +@contextlib.contextmanager +def working_directory(path: FilePath) -> Generator[None, None, None]: + """Change working directory and returns to previous on exit.""" + prev_cwd = Path.cwd() + os.chdir(path) + try: + yield + finally: + os.chdir(prev_cwd) diff --git a/src/haddock/libs/libworkflow.py b/src/haddock/libs/libworkflow.py index 41b4fe324..e3ef4cb65 100644 --- a/src/haddock/libs/libworkflow.py +++ b/src/haddock/libs/libworkflow.py @@ -17,18 +17,18 @@ from haddock.modules import ( modules_category, non_mandatory_general_parameters_defaults, -) + ) class WorkflowManager: """Read and execute workflows.""" def __init__( - self, - workflow_params: ModuleParams, - start: Optional[int] = 0, - **other_params: Any, - ) -> None: + self, + workflow_params: ModuleParams, + start: Optional[int] = 0, + **other_params: Any, + ) -> None: self.start = 0 if start is None else start self.recipe = Workflow(workflow_params, start=0, **other_params) # terminate is used to synchronize the `clean` option with the @@ -149,8 +149,13 @@ def execute(self) -> None: # Import the module given by the mode or default module_name = ".".join( - ["haddock", "modules", modules_category[self.module_name], self.module_name] - ) + [ + "haddock", + "modules", + modules_category[self.module_name], + self.module_name + ] + ) module_lib = importlib.import_module(module_name) self.module = module_lib.HaddockModule(order=self.order, path=self.working_path) diff --git a/src/haddock/modules/__init__.py b/src/haddock/modules/__init__.py index 65f5ab833..2efd25d50 100644 --- a/src/haddock/modules/__init__.py +++ b/src/haddock/modules/__init__.py @@ -7,7 +7,11 @@ from pathlib import Path from haddock import EmptyPath, log, modules_defaults_path -from haddock.core.defaults import MODULE_IO_FILE, INTERACTIVE_RE_SUFFIX +from haddock.core.defaults import ( + DATA_DIRNAME, + INTERACTIVE_RE_SUFFIX, + MODULE_IO_FILE, + ) from haddock.core.exceptions import ConfigurationError from haddock.core.typing import ( Any, @@ -122,7 +126,6 @@ def __init__(self, order: int, path: Path, params_fname: FilePath) -> None: """ self.order = order self.path = path - self.previous_io = self._load_previous_io() # instantiate module's parameters self._origignal_config_file = params_fname @@ -132,6 +135,7 @@ def __init__(self, order: int, path: Path, params_fname: FilePath) -> None: self._params: ParamDict = {} self.update_params(update_from_cfg_file=params_fname) + self.previous_io = self._load_previous_io() @property def params(self) -> ParamDict: @@ -207,7 +211,7 @@ def save_config(self, path: FilePath) -> None: # ... ignore = config_mandatory_general_parameters.union( non_mandatory_general_parameters_defaults - ) # noqa: 501 + ) params = deepcopy(self.params) with suppress(KeyError): @@ -262,7 +266,7 @@ def confirm_installation(cls) -> None: """ return - def export_io_models(self, faulty_tolerance=0): + def export_io_models(self, faulty_tolerance: int = 0) -> None: """ Export input/output to the ModuleIO interface. @@ -296,7 +300,6 @@ def export_io_models(self, faulty_tolerance=0): f"and tolerance was set to {faulty_tolerance:.2f}%." ) self.finish_with_error(_msg) - def finish_with_error(self, reason: object = "Module has failed.") -> None: """Finish with error message.""" @@ -310,32 +313,44 @@ def _load_previous_io( self, filename: FilePath = MODULE_IO_FILE, ) -> ModuleIO: - if self.order == 0: - self._num_of_input_molecules = 0 - return ModuleIO() - io = ModuleIO() - previous_io = Path(self.previous_path(), filename) - - if previous_io.is_file(): + # In case of the first step in the workflow + if self.order == 0: + self._load_first_io(io) + else: + previous_io = Path(self.previous_path(), filename) io.load(previous_io) - + # Count number of molecules self._num_of_input_molecules = len(io.output) - return io + + def _load_first_io(self, io: ModuleIO) -> None: + """Provide the first ModuleIO. + + Parameters + ---------- + io : ModuleIO + The content of the step -1 moduleIO object. + """ + # Point input molecules path + input_molecules_dir = Path(DATA_DIRNAME, self.path) + # Generate pdb files + io.load_from_input_molecules(input_molecules_dir) def previous_path(self) -> Path: """Give the path from the previous calculation.""" previous = get_module_steps_folders(self.path.resolve().parent) try: - # return Path(previous[self.order - 1]) return self.last_step_folder(previous, self.order - 1) except IndexError: return self.path @staticmethod - def last_step_folder(folders, index): + def last_step_folder( + folders: list[str], + index: int, + ) -> Optional[str]: """Retrieve last step folder.""" with_ind = [ folder for folder in folders diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py index 2322df408..d85e90dd6 100644 --- a/src/haddock/modules/topology/topoaa/__init__.py +++ b/src/haddock/modules/topology/topoaa/__init__.py @@ -1,7 +1,5 @@ """Create and manage CNS all-atom topology.""" import operator -import os -import re from functools import partial from pathlib import Path @@ -12,9 +10,8 @@ load_workflow_params, prepare_output, prepare_single_input, -) + ) from haddock.libs.libontology import Format, PDBFile, TopologyFile -from haddock.libs.libstructure import make_molecules from haddock.libs.libsubprocess import CNSJob from haddock.modules import get_engine from haddock.modules.base_cns_module import BaseCNSModule @@ -25,12 +22,12 @@ def generate_topology( - input_pdb: Path, - recipe_str: str, - defaults: ParamMap, - mol_params: ParamMap, - default_params_path: Optional[FilePath] = None, -) -> Path: + input_pdb: Path, + recipe_str: str, + defaults: ParamMap, + mol_params: ParamMap, + default_params_path: Optional[FilePath] = None, + ) -> Path: """Generate a HADDOCK topology file from input_pdb.""" # generate params headers general_param = load_workflow_params(**defaults) @@ -40,12 +37,12 @@ def generate_topology( # generate default headers link, trans_vec, tensor, scatter, axis, water_box = generate_default_header( path=default_params_path - ) + ) output = prepare_output( output_pdb_filename=f"{input_pdb.stem}_haddock{input_pdb.suffix}", output_psf_filename=f"{input_pdb.stem}_haddock.{Format.TOPOLOGY}", - ) + ) input_str = prepare_single_input(str(input_pdb)) @@ -60,7 +57,7 @@ def generate_topology( axis, water_box, recipe_str, - ) + ) inp = "".join(inp_parts) @@ -76,8 +73,11 @@ class HaddockModule(BaseCNSModule): name = RECIPE_PATH.name def __init__( - self, order: int, path: Path, initial_params: FilePath = DEFAULT_CONFIG - ) -> None: + self, + order: int, + path: Path, + initial_params: FilePath = DEFAULT_CONFIG, + ) -> None: cns_script = RECIPE_PATH / "cns" / "generate-topology.cns" super().__init__(order, path, initial_params, cns_script=cns_script) @@ -86,73 +86,24 @@ def confirm_installation(cls) -> None: """Confirm if module is installed.""" return - @staticmethod - def get_md5(ensemble_f: FilePath) -> dict[int, str]: - """Get MD5 hash of a multi-model PDB file.""" - md5_dic: dict[int, str] = {} - text = Path(ensemble_f).read_text() - lines = text.split(os.linesep) - REMARK_lines = (line for line in lines if line.startswith("REMARK")) - remd5 = re.compile(r"^[a-f0-9]{32}$") - for line in REMARK_lines: - parts = line.strip().split() - - try: - idx = parts.index("MODEL") - except ValueError: # MODEL not in parts, this line can be ignored - continue - - # check if there's a md5 hash in line - for part in parts: - group = remd5.fullmatch(part) - if group: - # the model num comes after the MODEL - model_num = int(parts[idx + 1]) - md5_dic[model_num] = group.string # md5 hash - break - - return md5_dic - - @staticmethod - def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]: - """Try to find origin for each model in ensemble. - - Parameters - ---------- - ensemble_f : FilePath - Path to a pdb file containing an ensemble. - - Returns - ------- - origin_dic : dict[int, str] - Dictionary holding as keys the modelID and values its origin. - """ - origin_dic: dict[int, str] = {} - text = Path(ensemble_f).read_text() - lines = text.split(os.linesep) - REMARK_lines = (line for line in lines if line.startswith("REMARK")) - re_origin = re.compile("REMARK\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+(([\w_-]+\.?)+)") # noqa : E501 - for line in REMARK_lines: - if (match := re_origin.search(line)): - model_num = int(match.group(1).strip()) - original_path = match.group(3).strip() - original_name = Path(original_path).stem - origin_dic[model_num] = original_name - return origin_dic - - def _run(self) -> None: """Execute module.""" + md5_dic: dict[int, dict[int, str]] = {} + self.params.pop("molecules") + molecules: list[list[Path]] = [] if self.order == 0: - # topoaa is the first step in the workflow - molecules = make_molecules(self.params.pop("molecules")) - + _molecules = self.previous_io.output + for i, models in enumerate(_molecules, start=1): + molecules.append([model.rel_path for model in models.values()]) + md5_dic[i] = { + j: model.md5 + for j, model in enumerate(models.values()) + } else: # in case topoaa is not the first step, the topology is rebuilt for # each retrieved model _molecules = self.previous_io.retrieve_models() - molecules_paths: list[Path] = [mol.rel_path for mol in _molecules] # type: ignore - molecules = make_molecules(molecules_paths, no_parent=True) + molecules = [[mol.rel_path] for mol in _molecules] # extracts `input` key from params. The `input` keyword needs to # be treated separately @@ -161,62 +112,40 @@ def _run(self) -> None: if k.startswith("mol") and k[3:].isdigit(): mol_params[k] = self.params.pop(k) - # to facilitate the for loop down the line, we create a list with the + # to facilitate the for loop down the line, we create a list with the # keys of `mol_params` with inverted order (we will use .pop) mol_params_keys = list(mol_params.keys())[::-1] # limit is only useful when order == 0 if self.order == 0 and self.params["limit"]: mol_params_get = mol_params_keys.pop - # `else` is used in any case where limit is False. else: mol_params_get = partial(operator.getitem, mol_params_keys, -1) # Pool of jobs to be executed by the CNS engine jobs: list[CNSJob] = [] - models_dic: dict[int, list[Path]] = {} - ens_dic: dict[int, dict[int, str]] = {} - origi_ens_dic: dict[int, dict[int, str]] = {} - for i, molecule in enumerate(molecules, start=1): - self.log(f"Molecule {i}: {molecule.file_name.name}") + + for i, models in enumerate(molecules, start=1): + self.log(f"Molecule {i}") models_dic[i] = [] - # Copy the molecule to the step folder - - # Split models - self.log( - f"Split models if needed for {molecule.with_parent}", - level="debug", - ) - # these come already sorted - splited_models = libpdb.split_ensemble( - molecule.with_parent, - dest=Path.cwd(), - ) - - # get the MD5 hash of each model - ens_dic[i] = self.get_md5(molecule.with_parent) - origi_ens_dic[i] = self.get_ensemble_origin(molecule.with_parent) # nice variable name, isn't it? :-) # molecule parameters are shared among models of the same molecule parameters_for_this_molecule = mol_params[mol_params_get()] - for model in splited_models: - self.log(f"Sanitizing molecule {model.name}") - models_dic[i].append(model) - + for model in models: + self.log(f"Sanitizing model {model.name}") + custom_top: Optional[FilePath] = None if self.params["ligand_top_fname"]: custom_top = self.params["ligand_top_fname"] self.log(f"Using custom topology {custom_top}") - libpdb.sanitize( - model, - overwrite=True, - custom_topology=custom_top, - ) - - else: - libpdb.sanitize(model, overwrite=True) + libpdb.sanitize( + model, + overwrite=True, + custom_topology=custom_top, + ) + models_dic[i].append(model) # Prepare generation of topologies jobs topology_filename = generate_topology( @@ -225,7 +154,7 @@ def _run(self) -> None: self.params, parameters_for_this_molecule, default_params_path=self.toppar_path, - ) + ) self.log( f"Topology CNS input created in {topology_filename.name}" @@ -239,7 +168,7 @@ def _run(self) -> None: output_filename, envvars=self.envvars, cns_exec=self.params["cns_exec"], - ) + ) jobs.append(job) @@ -250,54 +179,27 @@ def _run(self) -> None: engine.run() self.log("CNS jobs have finished") - # Check for generated output, fail it not all expected files + # Check for generated output, fail if not all expected files # are found expected: dict[int, dict[int, PDBFile]] = {} - for i in models_dic: - expected[i] = {} - md5_dic = ens_dic[i] - origin_names = origi_ens_dic[i] - for j, model in enumerate(models_dic[i]): - md5_hash = None - try: - model_id = int(model.stem.split("_")[-1]) - except ValueError: - model_id = 0 - - if model_id in md5_dic: - md5_hash = md5_dic[model_id] - + for i, models in models_dic.items(): + expected.setdefault(i, {}) + for j, model in enumerate(models): model_name = model.stem processed_pdb = Path(f"{model_name}_haddock.{Format.PDB}") processed_topology = Path( f"{model_name}_haddock.{Format.TOPOLOGY}" ) - - # Check if origin or md5 is available - if md5_hash or model_id in origin_names.keys(): - # Select prefix - if md5_hash: - prefix_name = md5_hash - else: - prefix_name = origin_names[model_id] - # Check if topology and file created - if processed_pdb.exists() and processed_topology.exists(): - # Build new filename - model_name = f"{prefix_name}_from_{model_name}" - # Rename files - processed_pdb = processed_pdb.rename( - f"{model_name}_haddock.{Format.PDB}" - ) - processed_topology = processed_topology.rename( - f"{model_name}_haddock.{Format.TOPOLOGY}" - ) - topology = TopologyFile(processed_topology, path=".") + try: + md5 = md5_dic[i][j] + except KeyError: + md5 = None pdb = PDBFile( file_name=processed_pdb, topology=topology, path=".", - md5=md5_hash, + md5=md5, ) pdb.ori_name = model.stem expected[i][j] = pdb diff --git a/src/haddock/modules/topology/topoaa/defaults.yaml b/src/haddock/modules/topology/topoaa/defaults.yaml index 237089432..c2eebea16 100644 --- a/src/haddock/modules/topology/topoaa/defaults.yaml +++ b/src/haddock/modules/topology/topoaa/defaults.yaml @@ -166,4 +166,3 @@ molecules: and type values of this parameter are meaningless. explevel: hidden group: molecules - diff --git a/tests/test_libworkflow.py b/tests/test_libworkflow.py index 88e52724c..46e2aac9a 100644 --- a/tests/test_libworkflow.py +++ b/tests/test_libworkflow.py @@ -1,6 +1,7 @@ """Uni-test functions for the Workflow Manager.""" import tempfile +from haddock.libs.libutil import working_directory from haddock.libs.libworkflow import WorkflowManager from haddock.core.typing import Any @@ -19,13 +20,14 @@ def test_WorkflowManager(caplog): } } with tempfile.TemporaryDirectory(dir=".") as _tmpdir: - workflow = WorkflowManager( - ParamDict, - start=0, - other_params=Any, - ) - workflow.postprocess() - first_log_line = str(caplog.records[0].message) - second_log_line = str(caplog.records[1].message) - assert first_log_line == "Reading instructions step 0_topoaa" - assert second_log_line == "Running haddock3-analyse on ./, modules [], with top_cluster = 10" # noqa : E501 + with working_directory(_tmpdir): + workflow = WorkflowManager( + ParamDict, + start=0, + other_params=Any, + ) + workflow.postprocess() + first_log_line = str(caplog.records[0].message) + second_log_line = str(caplog.records[1].message) + assert first_log_line == "Reading instructions step 0_topoaa" + assert second_log_line == "Running haddock3-analyse on ./, modules [], with top_cluster = 10" # noqa : E501 From 727c23fc9843481a7f467c3da79b296961278269 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 3 Jul 2024 17:07:24 +0200 Subject: [PATCH 03/32] fix types --- src/haddock/libs/libontology.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py index 3c4317406..ee5cfe77a 100644 --- a/src/haddock/libs/libontology.py +++ b/src/haddock/libs/libontology.py @@ -200,13 +200,9 @@ def retrieve_models( raise Exception(_msg) # prepare pairwise combinations - model_list = [ - values for values in zip(*input_dic.values()) - ] # type: ignore + model_list = [values for values in zip(*input_dic.values())] # type: ignore elif input_dic and crossdock and not individualize: - model_list = [ - values for values in itertools.product(*input_dic.values()) - ] # type: ignore + model_list = [values for values in itertools.product(*input_dic.values())] # type: ignore elif input_dic and individualize: model_list = list(itertools.chain(*input_dic.values())) From c19b2cdd5153ac1c99545fad93e4bffd1be57fd1 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 14:35:16 +0200 Subject: [PATCH 04/32] remove E203 and E501 lint checks from examples/run_tests.py --- examples/run_tests.py | 64 +++++++++++++++++++++---------------------- tox.ini | 3 +- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/examples/run_tests.py b/examples/run_tests.py index 938a0165a..6ec8ddaa2 100644 --- a/examples/run_tests.py +++ b/examples/run_tests.py @@ -47,34 +47,34 @@ # keys are the examples folder, and values are the configuration files # the whitespaces below are anti-pythonic but facilitate reading :-) examples = ( - ("docking-antibody-antigen" , "docking-antibody-antigen-ranairCDR-test.cfg"), # noqa: E203, E501 - ("docking-antibody-antigen" , "docking-antibody-antigen-ranairCDR-clt-test.cfg"), # noqa: E203, E501 - ("docking-antibody-antigen" , "docking-antibody-antigen-CDR-accessible-test.cfg"), # noqa: E203, E501 - ("docking-antibody-antigen" , "docking-antibody-antigen-CDR-accessible-clt-test.cfg"), # noqa: E203, E501 - ("docking-antibody-antigen" , "docking-antibody-antigen-CDR-NMR-CSP-test.cfg"), # noqa: E203, E501 - ("docking-protein-DNA" , "docking-protein-DNA-test.cfg"), # noqa: E203, E501 - ("docking-protein-DNA" , "docking-protein-DNA-mdref-test.cfg"), # noqa: E203, E501 - ("docking-protein-homotrimer" , "docking-protein-homotrimer-test.cfg"), # noqa: E203, E501 - ("docking-protein-glycan" , "docking-protein-glycan-test.cfg"), # noqa: E203, E501 - ("docking-protein-glycan" , "docking-protein-glycan-ilrmsd-test.cfg"), # noqa: E203, E501 - ("docking-protein-glycan" , "docking-flexref-protein-glycan-test.cfg"), # noqa: E203, E501 - ("docking-protein-ligand-shape", "docking-protein-ligand-shape-test.cfg"), # noqa: E203, E501 - ("docking-protein-ligand" , "docking-protein-ligand-test.cfg"), # noqa: E203, E501 - ("docking-protein-peptide" , "docking-protein-peptide-test.cfg"), # noqa: E203, E501 - ("docking-protein-peptide" , "docking-protein-peptide-mdref-test.cfg"), # noqa: E203, E501 - ("docking-protein-protein" , "docking-protein-protein-test.cfg"), # noqa: E203, E501 - ("docking-protein-protein" , "docking-protein-protein-cltsel-test.cfg"), # noqa: E203, E501 - ("docking-protein-protein" , "docking-protein-protein-mdref-test.cfg"), # noqa: E203, E501 - ("docking-multiple-ambig" , "docking-multiple-tbls-test.cfg"), # noqa: E203, E501 - ("docking-protein-protein" , "docking-exit-test.cfg"), # noqa: E203, E501 - ("refine-complex" , "refine-complex-test.cfg"), # noqa: E203, E501 - ("scoring" , "emscoring-test.cfg"), # noqa: E203, E501 - ("scoring" , "mdscoring-test.cfg"), # noqa: E203, E501 - ("scoring" , "emscoring-mdscoring-test.cfg"), # noqa: E203, E501 - ("analysis" , "topoaa-caprieval-test.cfg"), # noqa: E203, E501 - ("analysis" , "topoaa-ilrmsdmatrix-clustrmsd-test.cfg"), # noqa: E203, E501 - ("analysis" , "alascan-test.cfg"), # noqa: E203, E501 - ("analysis" , "contmap-test.cfg"), # noqa: E203, E501 + ("docking-antibody-antigen" , "docking-antibody-antigen-ranairCDR-test.cfg"), + ("docking-antibody-antigen" , "docking-antibody-antigen-ranairCDR-clt-test.cfg"), + ("docking-antibody-antigen" , "docking-antibody-antigen-CDR-accessible-test.cfg"), + ("docking-antibody-antigen" , "docking-antibody-antigen-CDR-accessible-clt-test.cfg"), + ("docking-antibody-antigen" , "docking-antibody-antigen-CDR-NMR-CSP-test.cfg"), + ("docking-protein-DNA" , "docking-protein-DNA-test.cfg"), + ("docking-protein-DNA" , "docking-protein-DNA-mdref-test.cfg"), + ("docking-protein-homotrimer" , "docking-protein-homotrimer-test.cfg"), + ("docking-protein-glycan" , "docking-protein-glycan-test.cfg"), + ("docking-protein-glycan" , "docking-protein-glycan-ilrmsd-test.cfg"), + ("docking-protein-glycan" , "docking-flexref-protein-glycan-test.cfg"), + ("docking-protein-ligand-shape", "docking-protein-ligand-shape-test.cfg"), + ("docking-protein-ligand" , "docking-protein-ligand-test.cfg"), + ("docking-protein-peptide" , "docking-protein-peptide-test.cfg"), + ("docking-protein-peptide" , "docking-protein-peptide-mdref-test.cfg"), + ("docking-protein-protein" , "docking-protein-protein-test.cfg"), + ("docking-protein-protein" , "docking-protein-protein-cltsel-test.cfg"), + ("docking-protein-protein" , "docking-protein-protein-mdref-test.cfg"), + ("docking-multiple-ambig" , "docking-multiple-tbls-test.cfg"), + ("docking-protein-protein" , "docking-exit-test.cfg"), + ("refine-complex" , "refine-complex-test.cfg"), + ("scoring" , "emscoring-test.cfg"), + ("scoring" , "mdscoring-test.cfg"), + ("scoring" , "emscoring-mdscoring-test.cfg"), + ("analysis" , "topoaa-caprieval-test.cfg"), + ("analysis" , "topoaa-ilrmsdmatrix-clustrmsd-test.cfg"), + ("analysis" , "alascan-test.cfg"), + ("analysis" , "contmap-test.cfg"), ) @@ -160,7 +160,7 @@ def main(examples, break_on_errors=True): rmtree("run2", ignore_errors=True) run_subprocess_cmd("haddock3-copy -r run1-test -m 0 4 -o run2") run_subprocess_cmd( - "haddock3 docking-extend-run-exit-test.cfg --extend-run run2", # noqa: E501 + "haddock3 docking-extend-run-exit-test.cfg --extend-run run2", ) # test exit with --restart @@ -178,12 +178,12 @@ def main(examples, break_on_errors=True): # perform a haddock3 re-scoring command run_subprocess_cmd( - "haddock3-re score -e 1.1 -w 1 -d 0.3 -b 1 -a 1 run1-re/2_caprieval", # noqa : E501 + "haddock3-re score -e 1.1 -w 1 -d 0.3 -b 1 -a 1 run1-re/2_caprieval", ) # perform a haddock3 re-clustfcc command run_subprocess_cmd( - "haddock3-re clustfcc -f 0.5 -s 0.7 -t 2 run1-re/1_clustfcc", # noqa : E501 + "haddock3-re clustfcc -f 0.5 -s 0.7 -t 2 run1-re/1_clustfcc", ) # FIXME: Make this runs properly function @@ -191,7 +191,7 @@ def main(examples, break_on_errors=True): # perform haddock3 --extend-run on re-run # run_subprocess_cmd( - # "haddock3 docking-re-extend-run-test.cfg --extend-run run1-re", # noqa : E501 + # "haddock3 docking-re-extend-run-test.cfg --extend-run run1-re", # ) # perform haddock3 --restart on re-run diff --git a/tox.ini b/tox.ini index 0ab20e6ff..c10be67a7 100644 --- a/tox.ini +++ b/tox.ini @@ -165,10 +165,11 @@ per-file-ignores = src/haddock/clis/cli_dmn.py:T201 src/haddock/clis/cli_score.py:T201 src/haddock/core/typing.py:F401 + src/haddock/modules/*/*/__init__.py:D205,D400 tests/*.py:D103 tests/test_gear_preprocessing.py:E501,D103,W291 tests/test_module_flexref.py:B017 - src/haddock/modules/*/*/__init__.py:D205,D400 + examples/run_tests.py:E203,E501 exclude = src/haddock/modules/_template_cat/_template_mod/__init__.py docstring-convention = numpy From 08c7fcb91e53c12ce1b1c7e5ece3a9e3d37ad6bc Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 14:36:57 +0200 Subject: [PATCH 05/32] add docstring in HaddockModel method --- src/haddock/gear/haddockmodel.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/haddock/gear/haddockmodel.py b/src/haddock/gear/haddockmodel.py index 9e669d2a7..a8960d3da 100644 --- a/src/haddock/gear/haddockmodel.py +++ b/src/haddock/gear/haddockmodel.py @@ -5,13 +5,25 @@ class HaddockModel: - """Represent HADDOCK model.""" + """Represent HADDOCK CNS model.""" def __init__(self, pdb_f: FilePath) -> None: self.energies = self._load_energies(pdb_f) @staticmethod def _load_energies(pdb_f: FilePath) -> dict[str, float]: + """Parse pdb file generated by CNS in search for scores. + + Parameters + ---------- + pdb_f : FilePath + Path to the pdb file + + Returns + ------- + dict[str, float] + Dictionary of the components with their unweighted values. + """ energy_dic: dict[str, float] = {} with open(pdb_f) as fh: for line in fh.readlines(): @@ -38,13 +50,13 @@ def _load_energies(pdb_f: FilePath) -> dict[str, float]: energy_dic['dani'] = dani energy_dic['xpcs'] = xpcs energy_dic['rg'] = rg - if 'buried surface area' in line: + elif 'buried surface area' in line: bsa = float(line.rstrip().split(':')[-1]) energy_dic['bsa'] = bsa - if 'Desolvation energy' in line: + elif 'Desolvation energy' in line: desolv = float(line.rstrip().split(':')[-1]) energy_dic['desolv'] = desolv - if 'Symmetry energy' in line: + elif 'Symmetry energy' in line: sym = float(line.rstrip().split(':')[-1]) energy_dic['sym'] = sym From 4285a18d5033d39d5dd15b1e070fb8e66d8236ba Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 14:38:16 +0200 Subject: [PATCH 06/32] workaround topoaa --- src/haddock/clis/cli_score.py | 137 ++++++------ src/haddock/clis/cli_traceback.py | 6 +- src/haddock/libs/libcns.py | 9 +- src/haddock/libs/libontology.py | 205 ++++++++++++------ src/haddock/libs/libpdb.py | 29 +++ src/haddock/libs/libstructure.py | 46 ---- src/haddock/libs/libworkflow.py | 10 +- src/haddock/modules/__init__.py | 2 +- .../modules/analysis/alascan/__init__.py | 2 +- src/haddock/modules/analysis/alascan/scan.py | 31 ++- .../modules/topology/topoaa/__init__.py | 79 +++---- 11 files changed, 311 insertions(+), 245 deletions(-) delete mode 100644 src/haddock/libs/libstructure.py diff --git a/src/haddock/clis/cli_score.py b/src/haddock/clis/cli_score.py index 5abb0a1e5..0f03d6394 100644 --- a/src/haddock/clis/cli_score.py +++ b/src/haddock/clis/cli_score.py @@ -103,7 +103,52 @@ def cli(ap: ArgumentParser, main: Callable[..., None]) -> None: def maincli() -> None: """Execute main client.""" - cli(ap, main) + cli(_ap(), main) + + +def get_parameters(kwargs: Any) -> dict[str, Any]: + from os import linesep + from haddock.gear.yaml2cfg import read_from_yaml_config + from haddock.modules.scoring.emscoring import DEFAULT_CONFIG + # config all parameters are correctly spelled. + default_emscoring = read_from_yaml_config(DEFAULT_CONFIG) + ems_dict = default_emscoring.copy() + n_warnings = 0 + for param, value in kwargs.items(): + if param not in default_emscoring: + sys.exit( + f"* ERROR * Parameter {param!r} is not a " + f"valid `emscoring` parameter.{linesep}" + "Valid emscoring parameters are: " + f"{', '.join(sorted(default_emscoring))}" + ) + if value != default_emscoring[param]: + print( + f"* ATTENTION * Value ({value}) of parameter {param} " + f"different from default ({default_emscoring[param]})" + ) + # get the type of default value + default_type = type(default_emscoring[param]) + # convert the value to the same type + if default_type == bool: + if value.lower() not in ["true", "false"]: + sys.exit( + f"* ERROR * Boolean parameter {param} " + "should be True or False" + ) + value = value.lower() == "true" + else: + value = default_type(value) + ems_dict[param] = value + n_warnings += 1 + if n_warnings != 0: + print( + "* ATTENTION * Non-default parameter values were used. " + "They should be properly reported if the output " + "data are used for publication." + ) + print(f"used emscoring parameters: {ems_dict}") + return ems_dict def main( @@ -143,19 +188,16 @@ def main( Any additional arguments that will be passed to the ``emscoring`` module. """ - import os import logging import shutil from contextlib import suppress from pathlib import Path from haddock import log + from haddock.core.defaults import DATA_DIRNAME from haddock.gear.haddockmodel import HaddockModel - from haddock.gear.yaml2cfg import read_from_yaml_config - from haddock.gear.zerofill import zero_fill from haddock.libs.libio import working_directory from haddock.libs.libworkflow import WorkflowManager - from haddock.modules.scoring.emscoring import DEFAULT_CONFIG log.setLevel(logging.ERROR) @@ -163,81 +205,46 @@ def main( if not input_pdb.exists(): sys.exit(f"* ERROR * Input PDB file {str(input_pdb)!r} does not exist") - # config all parameters are correctly spelled. - default_emscoring = read_from_yaml_config(DEFAULT_CONFIG) - ems_dict = default_emscoring.copy() - n_warnings = 0 - for param, value in kwargs.items(): - if param not in default_emscoring: - sys.exit( - f"* ERROR * Parameter {param!r} is not a " - f"valid `emscoring` parameter.{os.linesep}" - f"Valid emscoring parameters are: {', '.join(sorted(default_emscoring))}" - ) - if value != default_emscoring[param]: - print( - f"* ATTENTION * Value ({value}) of parameter {param} different from default ({default_emscoring[param]})" - ) # noqa:E501 - # get the type of default value - default_type = type(default_emscoring[param]) - # convert the value to the same type - if default_type == bool: - if value.lower() not in ["true", "false"]: - sys.exit(f"* ERROR * Boolean parameter {param} should be True or False") - value = value.lower() == "true" - else: - value = default_type(value) - ems_dict[param] = value - n_warnings += 1 - if n_warnings != 0: - print( - "* ATTENTION * Non-default parameter values were used. " - "They should be properly reported if the output " - "data are used for publication." - ) - print(f"used emscoring parameters: {ems_dict}") + # Get parameters + ems_dict = get_parameters(kwargs) # create run directory run_dir = Path(run_dir) with suppress(FileNotFoundError): shutil.rmtree(run_dir) run_dir.mkdir() - zero_fill.set_zerofill_number(2) - - # create temporary file - with tempfile.NamedTemporaryFile(prefix=input_pdb.stem, suffix=".pdb") as tmp: - - # create a copy of the input pdb - input_pdb_copy = Path(tmp.name) - shutil.copy(input_pdb, input_pdb_copy) - - params = { - "topoaa": {"molecules": [input_pdb_copy]}, - "emscoring": ems_dict, + # create a copy of the input pdb in run directory + input_molecule_dir = Path(run_dir, DATA_DIRNAME, "0_topoaa") + input_molecule_dir.mkdir(parents=True, exist_ok=True) + input_pdb_copy = Path(input_molecule_dir, input_pdb.name) + shutil.copy(input_pdb, input_pdb_copy) + + # Set workflow parameters + params = { + "topoaa": {"molecules": [input_pdb_copy]}, + "emscoring": ems_dict, } - + # run workflow + with working_directory(run_dir): + workflow = WorkflowManager( + workflow_params=params, + start=0, + run_dir=run_dir, + ) print("> starting calculations...") + workflow.run() - # run workflow - with working_directory(run_dir): - workflow = WorkflowManager( - workflow_params=params, - start=0, - run_dir=run_dir, - ) - - workflow.run() - - minimized_mol = Path(run_dir, "1_emscoring", "emscoring_1.pdb") - haddock_score_component_dic = HaddockModel(minimized_mol).energies - + # Point generated structure path + minimized_mol_path = Path(run_dir, "1_emscoring", "emscoring_1.pdb") + haddock_score_component_dic = HaddockModel(minimized_mol_path).energies + # Gather haddock score components vdw = haddock_score_component_dic["vdw"] elec = haddock_score_component_dic["elec"] desolv = haddock_score_component_dic["desolv"] air = haddock_score_component_dic["air"] bsa = haddock_score_component_dic["bsa"] - # emscoring is equivalent to itw + # Weight the components to obtain the HADDOCK score haddock_score_itw = ( ems_dict["w_vdw"] * vdw + ems_dict["w_elec"] * elec diff --git a/src/haddock/clis/cli_traceback.py b/src/haddock/clis/cli_traceback.py index b81ee319b..14e332447 100644 --- a/src/haddock/clis/cli_traceback.py +++ b/src/haddock/clis/cli_traceback.py @@ -15,9 +15,9 @@ import numpy as np import pandas as pd -from typing import Any from haddock import log +from haddock.core.typing import FilePath, Any from haddock.libs import libcli from haddock.libs.libontology import ModuleIO, PDBFile from haddock.libs.libplots import make_traceback_plot @@ -94,7 +94,7 @@ def get_ori_names(n: int, pdbfile: PDBFile, max_topo_len: int) -> tuple[list, in def traceback_dataframe( data_dict: dict, rank_dict: dict, sel_step: list, max_topo_len: int -) -> None: +) -> pd.DataFrame: """ Create traceback dataframe by combining together ranks and data. @@ -242,7 +242,7 @@ def maincli(): cli(ap, main) -def main(run_dir): +def main(run_dir: FilePath) -> None: """ Traceback CLI. diff --git a/src/haddock/libs/libcns.py b/src/haddock/libs/libcns.py index ab72fdef2..786e9355f 100644 --- a/src/haddock/libs/libcns.py +++ b/src/haddock/libs/libcns.py @@ -365,9 +365,12 @@ def prepare_cns_input( return inp_file -def prepare_expected_pdb(model_obj: Union[PDBFile, tuple[PDBFile, - ...]], model_nb: int, - path: FilePath, identifier: str) -> PDBFile: +def prepare_expected_pdb( + model_obj: Union[PDBFile, tuple[PDBFile, ...]], + model_nb: int, + path: FilePath, + identifier: str, + ) -> PDBFile: """Prepare a PDBobject.""" expected_pdb_fname = Path(path, f"{identifier}_{model_nb}.pdb") pdb = PDBFile(expected_pdb_fname, path=path) diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py index ee5cfe77a..17802bcb5 100644 --- a/src/haddock/libs/libontology.py +++ b/src/haddock/libs/libontology.py @@ -4,6 +4,7 @@ import os import re from enum import Enum +from functools import partial from os import linesep from os.path import getmtime from pathlib import Path @@ -14,12 +15,13 @@ from haddock.core.typing import ( Any, FilePath, + Iterable, List, Optional, TypeVar, Union, ) -from haddock.libs import libpdb +from haddock.libs.libpdb import split_ensemble NaN = float("nan") @@ -181,13 +183,15 @@ def retrieve_models( input_dic: dict[int, list[PDBFile]] = {} for i, element in enumerate(self.output): - if isinstance(element, dict): + # Make molecules from elements + molecule = Molecule(element) + if isinstance(molecule.pdb_files, dict): position_list: list[PDBFile] = input_dic.setdefault(i, []) for key in element: position_list.append(element[key]) # type: ignore - - elif element.file_type == Format.PDB: # type: ignore + elif molecule.pdb_files.file_type == Format.PDB: # type: ignore model_list.append(element) # type: ignore + if input_dic and not crossdock and not individualize: # check if all ensembles contain the same number of models sub_lists = iter(input_dic.values()) @@ -198,7 +202,6 @@ def retrieve_models( " cannot prepare pairwise complexes." ) raise Exception(_msg) - # prepare pairwise combinations model_list = [values for values in zip(*input_dic.values())] # type: ignore elif input_dic and crossdock and not individualize: @@ -271,77 +274,148 @@ def load_from_input_molecules( """ # Gather all input molecules input_molecules = list(input_molecules_dir.glob('*.pdb')) + assert input_molecules != [], \ + f"No molecules could be found in `{input_molecules_dir}`" # Sort them by creation date (which is also input order) input_molecules.sort(key=getmtime) # FIXME: getctime ? # Set input attribute self.input = input_molecules # Set parsing variables - molecules_dic: dict[int, dict[int, PDBFile]] = {} - # Loop over input molecules - for i, molecule in enumerate(self.input, start=1): - # Split models (these come already sorted) - splited_models = libpdb.split_ensemble( - molecule, - dest=input_molecules_dir, - ) - # get the MD5 hash of each model - md5_dic = self.get_md5(molecule) - origin_names = self.get_ensemble_origin(molecule) - # Initiate with empty list - molecules_dic.setdefault(i, {}) - # Loop over conformers of this ensemble - for j, model in enumerate(splited_models): - processed_model = model - model_name = model.stem - # Search of md5 information - md5_hash = None - try: - model_id = int(model_name.split("_")[-1]) - except ValueError: - model_id = 0 - if model_id in md5_dic: - md5_hash = md5_dic[model_id] - # Check if origin or md5 is available - if md5_hash or model_id in origin_names.keys(): - # Select prefix - if md5_hash: # Prioritize the md5 hash - prefix_name = md5_hash - else: - prefix_name = origin_names[model_id] - # Build new filename - model_new_name = f"{prefix_name}_from_{model_name}" - # Rename file - processed_model = model.rename( - Path( - input_molecules_dir, - f"{model_new_name}.{Format.PDB}", - ) + molecules_list: list[dict[int, PDBFile]] = [ + Molecule(input_file).pdb_files + for input_file in self.input + ] + # And fake them to be the output of the previous io + self.output = molecules_list + + +class Molecule: + """ + Input molecule, usually a PDB file. + + Parameters + ---------- + file_name : :external:py:class:`pathlib.Path` + The path to the molecule file. + + segid : int, optional + The ID of the segment. Defaults to ``None``. + + no_parent : boolean + Whether to add the parent path ``..`` to the + :py:attr:`haddock.libs.libstructure.Molecule.with_parent`. + When set to true, the ``with_parent`` attribute returns the same + as ``file_name``. + """ + + def __init__( + self, + pdb_file: Union[PDBFile, tuple[dict[int, PDBFile]], FilePath], + ) -> None: + self.input_file = pdb_file + self._pdb_files: dict[int, PDBFile] = {} + self.standardize_input_pdbfile() + + def standardize_input_pdbfile(self): + if any([isinstance(self.input_file, ftype) for ftype in (str, Path)]): + self.gen_pdb_object() + else: + self.pdb_files = self.input_file + + @property + def count_models(self) -> int: + self._nb_models = getattr( + self, + "_nb_models", + 1 if isinstance(self.pdb_files, PDBFile) \ + else len(self.pdb_files.keys()), + ) + return self._nb_models + + @property + def pdb_files(self): + return self._pdb_files + + @pdb_files.setter + def pdb_files(self, value: Union[dict[int, PDBFile], PDBFile]) -> None: + self._pdb_files = value + + def __len__(self) -> int: + return self.count_models + + def __repr__(self) -> str: + return f"Molecule {self.input_file}: {len(self)} models" + + def gen_pdb_object(self) -> None: + # Create a Path object form input file + pdb_filepath = self.input_file + if not isinstance(pdb_filepath, Path): + pdb_filepath = Path(pdb_filepath) + # Obtain origin directory + input_molecules_dir = pdb_filepath.parent + # Eventually split models (they come back sorted by order in the file) + splited_models = split_ensemble( + pdb_filepath, + dest=input_molecules_dir, + ) + # get the MD5 hash of each model + md5_dic = self.get_md5(pdb_filepath) + origin_names = self.get_ensemble_origin(pdb_filepath) + # Initiate holding variable + pdb_files: dict[int, PDBFile] = {} + # Loop over conformers of this ensemble + for j, model in enumerate(splited_models): + processed_model = model + model_name = model.stem + # Search of md5 information + md5_hash = None + try: + model_id = int(model_name.split("_")[-1]) + except ValueError: + model_id = 0 + if model_id in md5_dic: + md5_hash = md5_dic[model_id] + # Check if origin or md5 is available + if md5_hash or model_id in origin_names.keys(): + # Select prefix + if md5_hash: # Prioritize the md5 hash + prefix_name = md5_hash + else: + prefix_name = origin_names[model_id] + # Build new filename + model_new_name = f"{prefix_name}_from_{model_name}" + # Rename file + processed_model = model.rename( + Path( + input_molecules_dir, + f"{model_new_name}.pdb", ) - # Create a PDBFile object - pdbfile = PDBFile( - processed_model, - md5=md5_hash, ) - # Modify relative path attribute - pdbfile.rel_path = Path( - "..", - input_molecules_dir, - pdbfile.file_name - ) - # Set origin name - pdbfile.ori_name = molecule - # Hold that conformer/model - molecules_dic[i][j] = pdbfile - # And fake them to be the output of the previous io - self.output = list(molecules_dic.values()) + # Create a PDBFile object + pdbfile = PDBFile( + processed_model, + md5=md5_hash, + ) + # Modify relative path attribute + pdbfile.rel_path = Path( + "..", + input_molecules_dir, + pdbfile.file_name + ) + # Set origin name + pdbfile.ori_name = pdb_filepath + # Hold this guy + pdb_files[j] = pdbfile + # Set attribute + self.pdb_files = pdb_files @staticmethod def get_md5(ensemble_f: FilePath) -> dict[int, str]: """Get MD5 hash of a multi-model PDB file.""" md5_dic: dict[int, str] = {} text = Path(ensemble_f).read_text() - lines = text.split(os.linesep) + lines = text.split(linesep) REMARK_lines = (line for line in lines if line.startswith("REMARK")) remd5 = re.compile(r"^[a-f0-9]{32}$") for line in REMARK_lines: @@ -379,7 +453,7 @@ def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]: """ origin_dic: dict[int, str] = {} text = Path(ensemble_f).read_text() - lines = text.split(os.linesep) + lines = text.split(linesep) REMARK_lines = (line for line in lines if line.startswith("REMARK")) re_origin = re.compile( r"REMARK\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+(([\w_-]+\.?)+)" @@ -393,6 +467,11 @@ def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]: return origin_dic +def make_molecules(paths: Iterable[Path], **kwargs: Any) -> list[Molecule]: + """Get input molecules from the data stream.""" + return list(map(partial(Molecule, **kwargs), paths)) + + PDBPath = Union[PDBFile, Path] PDBPathT = TypeVar("PDBPathT", bound=Union[PDBFile, Path]) diff --git a/src/haddock/libs/libpdb.py b/src/haddock/libs/libpdb.py index 7d7e4ec6b..69c173ad6 100644 --- a/src/haddock/libs/libpdb.py +++ b/src/haddock/libs/libpdb.py @@ -144,6 +144,35 @@ def split_ensemble( return pdb_files_path +def count_models(pdb_file_path: FilePath) -> int: + """Count number of models in a pdb file. + + Read filepath and return number of models found in it. + If none (not an ensemble), 1 is returned. + + Parameters + ---------- + pdb_file_path : FilePath (Union[str, Path]) + Path to the pdb file to analyse. + + Returns + ------- + nb_models : int + The number of models found in this pdb file. + """ + models_starts: int = 0 + model_ends: int = 0 + with open(pdb_file_path, 'r') as filin: + for line in filin: + if line.startswith("ENDMDL"): + model_ends += 1 + elif line.startswith("MODEL"): + models_starts += 1 + nb_models = max(1, model_ends) + assert max(1, models_starts) == nb_models + return nb_models + + def split_by_chain(pdb_file_path: FilePath) -> list[Path]: """Split a PDB file into multiple structures for each chain.""" abs_path = Path(pdb_file_path).resolve().parent.absolute() diff --git a/src/haddock/libs/libstructure.py b/src/haddock/libs/libstructure.py deleted file mode 100644 index 46e6d6d62..000000000 --- a/src/haddock/libs/libstructure.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Molecular data structures.""" -from functools import partial -from pathlib import Path -from typing import Any, Iterable, Optional - - -class Molecule: - """ - Input molecule, usually a PDB file. - - Parameters - ---------- - file_name : :external:py:class:`pathlib.Path` - The path to the molecule file. - - segid : int, optional - The ID of the segment. Defaults to ``None``. - - no_parent : boolean - Whether to add the parent path ``..`` to the - :py:attr:`haddock.libs.libstructure.Molecule.with_parent`. - When set to true, the ``with_parent`` attribute returns the same - as ``file_name``. - """ - - def __init__( - self, - file_name: Path, - segid: Optional[int] = None, - no_parent: bool = False, - ) -> None: - # the rest of the code is too dependent on the Path API - assert isinstance(file_name, Path), \ - f"`file_name` must be pathlib.Path: {type(file_name)} given" - - self.file_name = file_name - self.segid = segid - if no_parent: - self.with_parent = file_name - else: - self.with_parent = Path('..', file_name) - - -def make_molecules(paths: Iterable[Path], **kwargs: Any) -> list[Molecule]: - """Get input molecules from the data stream.""" - return list(map(partial(Molecule, **kwargs), paths)) diff --git a/src/haddock/libs/libworkflow.py b/src/haddock/libs/libworkflow.py index e3ef4cb65..d972f627c 100644 --- a/src/haddock/libs/libworkflow.py +++ b/src/haddock/libs/libworkflow.py @@ -38,7 +38,8 @@ def __init__( def run(self) -> None: """High level workflow composer.""" - for i, step in enumerate(self.recipe.steps[self.start :], start=self.start): + id_steps = enumerate(self.recipe.steps[self.start:], start=self.start) + for i, step in id_steps: try: step.execute() except HaddockTermination: @@ -75,8 +76,11 @@ def postprocess(self) -> None: if step.module_name == "caprieval": capri_steps.append(step.order) # type: ignore # call cli_analyse (no need for capri_dicts, it's all precalculated) - cli_analyse("./", capri_steps, top_cluster=10, format=None, scale=None, - inter=False, is_cleaned=is_cleaned, offline=offline, mode=mode, ncores=ncores) + cli_analyse( + "./", capri_steps, top_cluster=10, format=None, scale=None, + inter=False, is_cleaned=is_cleaned, offline=offline, mode=mode, + ncores=ncores, + ) # call cli_traceback. If it fails, it's not a big deal try: cli_traceback("./") diff --git a/src/haddock/modules/__init__.py b/src/haddock/modules/__init__.py index 2efd25d50..084399d18 100644 --- a/src/haddock/modules/__init__.py +++ b/src/haddock/modules/__init__.py @@ -235,7 +235,7 @@ def _run(self) -> None: def run(self, **params: Any) -> None: """Execute the module.""" - log.info(f"Running [{self.name}] module") + log.info(f"Running [{self.name}] module (step {self.order})") self.update_params(**params) self.add_parent_to_paths() diff --git a/src/haddock/modules/analysis/alascan/__init__.py b/src/haddock/modules/analysis/alascan/__init__.py index 199df61f4..07801c9c6 100644 --- a/src/haddock/modules/analysis/alascan/__init__.py +++ b/src/haddock/modules/analysis/alascan/__init__.py @@ -61,7 +61,7 @@ def _run(self): alascan_jobs = [] for core in range(ncores): - output_name = "alascan_" + str(core) + ".scan" + output_name = f"alascan_{core}.scan" scan_obj = Scan( model_list=models[index_list[core]:index_list[core + 1]], output_name=output_name, diff --git a/src/haddock/modules/analysis/alascan/scan.py b/src/haddock/modules/analysis/alascan/scan.py index 37e72a685..5d2015f34 100644 --- a/src/haddock/modules/analysis/alascan/scan.py +++ b/src/haddock/modules/analysis/alascan/scan.py @@ -156,7 +156,8 @@ def add_delta_to_bfactor(pdb_f, df_scan): os.rename(tmp_pdb_f, pdb_f) return pdb_f -def get_score_string(pdb_f, run_dir): + +def get_score_string(pdb_f: str, run_dir: str) -> list[str]: """Get score output from cli_score.main. Parameters @@ -169,7 +170,7 @@ def get_score_string(pdb_f, run_dir): Returns ------- - out : list + out : list[str] List of strings with the score output. """ f = io.StringIO() @@ -179,7 +180,10 @@ def get_score_string(pdb_f, run_dir): return out -def calc_score(pdb_f, run_dir): +def calc_score( + pdb_f: str, + run_dir: str, + ) -> tuple[float, float, float, float, float]: """Calculate the score of a model. Parameters @@ -464,12 +468,13 @@ def run(self): native.rel_path, cutoff=self.int_cutoff ) - + atoms = get_atoms(native.rel_path) - coords, chain_ranges = load_coords(native.rel_path, - atoms, - add_resname=True - ) + coords, _chain_ranges = load_coords( + native.rel_path, + atoms, + add_resname=True, + ) resname_dict = {} for chain, resid, _atom, resname in coords.keys(): key = f"{chain}-{resid}" @@ -490,10 +495,12 @@ def run(self): c_bsa = n_bsa else: try: - mut_pdb_name = mutate(native.rel_path, - chain, - res, - end_resname) + mut_pdb_name = mutate( + native.rel_path, + chain, + res, + end_resname, + ) except KeyError: continue # now we score the mutated model diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py index d85e90dd6..db09e3d7b 100644 --- a/src/haddock/modules/topology/topoaa/__init__.py +++ b/src/haddock/modules/topology/topoaa/__init__.py @@ -88,22 +88,16 @@ def confirm_installation(cls) -> None: def _run(self) -> None: """Execute module.""" - md5_dic: dict[int, dict[int, str]] = {} self.params.pop("molecules") - molecules: list[list[Path]] = [] + input_molecules: list[list[PDBFile]] = [] if self.order == 0: _molecules = self.previous_io.output - for i, models in enumerate(_molecules, start=1): - molecules.append([model.rel_path for model in models.values()]) - md5_dic[i] = { - j: model.md5 - for j, model in enumerate(models.values()) - } + input_molecules = [list(models.values()) for models in _molecules] else: # in case topoaa is not the first step, the topology is rebuilt for # each retrieved model _molecules = self.previous_io.retrieve_models() - molecules = [[mol.rel_path] for mol in _molecules] + input_molecules = [[mol] for mol in _molecules] # extracts `input` key from params. The `input` keyword needs to # be treated separately @@ -125,85 +119,74 @@ def _run(self) -> None: # Pool of jobs to be executed by the CNS engine jobs: list[CNSJob] = [] - models_dic: dict[int, list[Path]] = {} - - for i, models in enumerate(molecules, start=1): + output_molecules: list[dict[int, PDBFile]] = [] + for i, models in enumerate(input_molecules, start=1): self.log(f"Molecule {i}") - models_dic[i] = [] + models_dic: dict[int, PDBFile] = {} # nice variable name, isn't it? :-) # molecule parameters are shared among models of the same molecule parameters_for_this_molecule = mol_params[mol_params_get()] - - for model in models: - self.log(f"Sanitizing model {model.name}") + # Loop over models/conformers of this molecule + for j, model in enumerate(models): + # Point path of this model + model_path = model.rel_path + self.log(f"Sanitizing model {model_path.name}") + # Gather custom topology custom_top: Optional[FilePath] = None if self.params["ligand_top_fname"]: custom_top = self.params["ligand_top_fname"] self.log(f"Using custom topology {custom_top}") libpdb.sanitize( - model, + model_path, overwrite=True, custom_topology=custom_top, ) - models_dic[i].append(model) - # Prepare generation of topologies jobs topology_filename = generate_topology( - model, + model_path, self.recipe_str, self.params, parameters_for_this_molecule, default_params_path=self.toppar_path, ) - self.log( f"Topology CNS input created in {topology_filename.name}" ) - # Add new job to the pool - output_filename = Path(f"{model.stem}.{Format.CNS_OUTPUT}") - + output_filename = Path(f"{model_path.stem}.{Format.CNS_OUTPUT}") job = CNSJob( topology_filename, output_filename, envvars=self.envvars, cns_exec=self.params["cns_exec"], ) - jobs.append(job) - - # Run CNS Jobs - self.log(f"Running CNS Jobs n={len(jobs)}") - Engine = get_engine(self.params["mode"], self.params) - engine = Engine(jobs) - engine.run() - self.log("CNS jobs have finished") - - # Check for generated output, fail if not all expected files - # are found - expected: dict[int, dict[int, PDBFile]] = {} - for i, models in models_dic.items(): - expected.setdefault(i, {}) - for j, model in enumerate(models): - model_name = model.stem + # Generate future output files + model_name = model_path.stem processed_pdb = Path(f"{model_name}_haddock.{Format.PDB}") processed_topology = Path( f"{model_name}_haddock.{Format.TOPOLOGY}" ) topology = TopologyFile(processed_topology, path=".") - try: - md5 = md5_dic[i][j] - except KeyError: - md5 = None + # Create new PDBFile object pdb = PDBFile( file_name=processed_pdb, topology=topology, path=".", - md5=md5, + md5=model.md5, ) - pdb.ori_name = model.stem - expected[i][j] = pdb + pdb.ori_name = model_name + # Hold PDBFile into models + models_dic[j] = pdb + output_molecules.append(models_dic) + + # Run CNS Jobs + self.log(f"Running CNS Jobs n={len(jobs)}") + Engine = get_engine(self.params["mode"], self.params) + engine = Engine(jobs) + engine.run() + self.log("CNS jobs have finished") # Save module information - self.output_models = list(expected.values()) # type: ignore + self.output_models = output_molecules # type: ignore self.export_io_models(faulty_tolerance=self.params["tolerance"]) From 55e20d52b2f6037ebf4af970af29c4bd3c550e38 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 15:00:26 +0200 Subject: [PATCH 07/32] fix tests --- tests/test_gear_prepare_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_gear_prepare_run.py b/tests/test_gear_prepare_run.py index 7002e4dae..4c03091a7 100644 --- a/tests/test_gear_prepare_run.py +++ b/tests/test_gear_prepare_run.py @@ -119,7 +119,7 @@ def test_populate_mol_params(): "caprieval.1": {}, } - populate_mol_parameters(params) + populate_mol_parameters(params, params["topoaa.1"]) assert "mol_fix_origin_1" in params["flexref.1"] assert "mol_fix_origin_2" in params["flexref.1"] assert "mol_fix_origin_3" in params["flexref.1"] From 65242ddb1b72fdd2203ae71d2382bfdda3e403fb Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 15:12:30 +0200 Subject: [PATCH 08/32] fix types and tests for clustfcc --- src/haddock/libs/libontology.py | 2 +- tests/test_module_clustfcc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py index 17802bcb5..f799d72f7 100644 --- a/src/haddock/libs/libontology.py +++ b/src/haddock/libs/libontology.py @@ -166,7 +166,7 @@ def save( output_handler.write(jsonpickle.encode(to_save)) # type: ignore return fpath - def load(self, filename: FilePath) -> None: + def load(self, filename: Path) -> None: """Load the content of a given IO filename.""" if filename.is_file(): with open(filename) as json_file: diff --git a/tests/test_module_clustfcc.py b/tests/test_module_clustfcc.py index f83c72635..07349464c 100644 --- a/tests/test_module_clustfcc.py +++ b/tests/test_module_clustfcc.py @@ -115,7 +115,7 @@ def test_io_json(fcc_module, prot_input_list): # check the content of io.json io = ModuleIO() - io.load("io.json") + io.load(Path("io.json")) assert io.input[0].file_name == prot_input_list[0].file_name assert io.output[1].file_name == prot_input_list[1].file_name From 537869b300f26c7fdc0b8e22899578355f1c08d4 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 15:25:13 +0200 Subject: [PATCH 09/32] Convert FilePath to Path in libontology load() --- src/haddock/libs/libontology.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py index f799d72f7..189f74585 100644 --- a/src/haddock/libs/libontology.py +++ b/src/haddock/libs/libontology.py @@ -166,9 +166,9 @@ def save( output_handler.write(jsonpickle.encode(to_save)) # type: ignore return fpath - def load(self, filename: Path) -> None: + def load(self, filename: FilePath) -> None: """Load the content of a given IO filename.""" - if filename.is_file(): + if Path(filename).is_file(): with open(filename) as json_file: content = jsonpickle.decode(json_file.read()) self.input = content["input"] # type: ignore From 83793c3e9e3c24c39397f6c4ff7205d5889fa8f2 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 15:54:17 +0200 Subject: [PATCH 10/32] adding tests --- tests/test_libontology.py | 76 +++++++++++++++++++++++++++++++++++++ tests/test_module_topoaa.py | 16 -------- 2 files changed, 76 insertions(+), 16 deletions(-) create mode 100644 tests/test_libontology.py diff --git a/tests/test_libontology.py b/tests/test_libontology.py new file mode 100644 index 000000000..9bb6fc4cb --- /dev/null +++ b/tests/test_libontology.py @@ -0,0 +1,76 @@ +"""Test functions and methods in haddock.libs.libontology.""" +import pytest +from pathlib import Path + +from haddock.libs.libontology import ( + Molecule, + PDBFile, + ) + +from . import golden_data + + +@pytest.fixture +def molecule(): + return Molecule(None) + + +@pytest.fixture +def protein(): + return Path(golden_data, "protein.pdb") + + +@pytest.fixture +def ensemble_header_w_md5(): + return Path(golden_data, "ens_header.pdb") + + +def test_get_md5(molecule, ensemble_header_w_md5, protein): + """Test get_md5 method.""" + observed_md5_dic = molecule.get_md5(ensemble_header_w_md5) + expected_md5_dic = { + 1: '71098743056e0b95fbfafff690703761', + 2: 'f7ab0b7c751adf44de0f25f53cfee50b', + 3: '41e028d8d28b8d97148dc5e548672142', + 4: '761cb5da81d83971c2aae2f0b857ca1e', + 5: '6c438f941cec7c6dc092c8e48e5b1c10', + } + + assert observed_md5_dic == expected_md5_dic + observed_md5_dic = molecule.get_md5(protein) + assert observed_md5_dic == {} + + +def test_get_ensemble_origin(molecule, ensemble_header_w_md5, protein): + """Test get_ensemble_origin method.""" + expected_origin_dic = { + 1: 'T161-hybrid-fit-C2-NCS_complex_100w', + 2: 'T161-hybrid-fit-C2-NCS_complex_101w', + 3: 'T161-hybrid-fit-C2-NCS_complex_102w', + 4: 'T161-hybrid-fit-C2-NCS_complex_103w', + 5: 'T161-hybrid-fit-C2-NCS_complex_104w', + } + observed_origin = molecule.get_ensemble_origin(ensemble_header_w_md5) + assert observed_origin == expected_origin_dic + observed_origin = molecule.get_ensemble_origin(protein) + assert observed_origin == {} + + +def test_load_single_pdb(molecule, protein): + """Test casting into PDBFile.""" + # Re-initialize with a actual protein + molecule.__init__(protein) + assert isinstance(molecule.pdb_files, dict) + for pdbfile in molecule.pdb_files.values(): + assert isinstance(pdbfile, PDBFile) + assert len(molecule) == 1 + + +def test_load_single_pdb(molecule, protein): + """Test casting into PDBFile.""" + # Re-initialize with a actual protein + molecule.__init__(protein) + assert isinstance(molecule.pdb_files, dict) + for pdbfile in molecule.pdb_files.values(): + assert isinstance(pdbfile, PDBFile) + assert len(molecule) == 1 diff --git a/tests/test_module_topoaa.py b/tests/test_module_topoaa.py index 5ceaa696a..cc7c87319 100644 --- a/tests/test_module_topoaa.py +++ b/tests/test_module_topoaa.py @@ -75,22 +75,6 @@ def test_generate_topology(topoaa, protein): observed_inp_out.unlink() -def test_get_md5(topoaa, ensemble_header_w_md5, protein): - """Test get_md5 method.""" - observed_md5_dic = topoaa.get_md5(ensemble_header_w_md5) - expected_md5_dic = { - 1: '71098743056e0b95fbfafff690703761', - 2: 'f7ab0b7c751adf44de0f25f53cfee50b', - 3: '41e028d8d28b8d97148dc5e548672142', - 4: '761cb5da81d83971c2aae2f0b857ca1e', - 5: '6c438f941cec7c6dc092c8e48e5b1c10'} - - assert observed_md5_dic == expected_md5_dic - - observed_md5_dic = topoaa.get_md5(protein) - assert observed_md5_dic == {} - - @pytest.mark.skip(reason="Cannot test in Github Actions") def test__run(topoaa, protein): """Test _run method.""" From 278433142e929175038ac072315dc2affaa71aca Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 16:07:08 +0200 Subject: [PATCH 11/32] fix tests --- src/haddock/libs/libontology.py | 2 +- tests/test_libontology.py | 43 ++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py index 189f74585..859495096 100644 --- a/src/haddock/libs/libontology.py +++ b/src/haddock/libs/libontology.py @@ -334,7 +334,7 @@ def count_models(self) -> int: return self._nb_models @property - def pdb_files(self): + def pdb_files(self) -> Union[dict[int, PDBFile], PDBFile]: return self._pdb_files @pdb_files.setter diff --git a/tests/test_libontology.py b/tests/test_libontology.py index 9bb6fc4cb..4bbf8763d 100644 --- a/tests/test_libontology.py +++ b/tests/test_libontology.py @@ -1,5 +1,8 @@ """Test functions and methods in haddock.libs.libontology.""" import pytest +import tempfile +import shutil + from pathlib import Path from haddock.libs.libontology import ( @@ -25,7 +28,11 @@ def ensemble_header_w_md5(): return Path(golden_data, "ens_header.pdb") -def test_get_md5(molecule, ensemble_header_w_md5, protein): +def test_get_md5( + molecule: Molecule, + ensemble_header_w_md5: Path, + protein: Path, + ): """Test get_md5 method.""" observed_md5_dic = molecule.get_md5(ensemble_header_w_md5) expected_md5_dic = { @@ -41,7 +48,11 @@ def test_get_md5(molecule, ensemble_header_w_md5, protein): assert observed_md5_dic == {} -def test_get_ensemble_origin(molecule, ensemble_header_w_md5, protein): +def test_get_ensemble_origin( + molecule: Molecule, + ensemble_header_w_md5: Path, + protein: Path, + ): """Test get_ensemble_origin method.""" expected_origin_dic = { 1: 'T161-hybrid-fit-C2-NCS_complex_100w', @@ -56,21 +67,15 @@ def test_get_ensemble_origin(molecule, ensemble_header_w_md5, protein): assert observed_origin == {} -def test_load_single_pdb(molecule, protein): - """Test casting into PDBFile.""" - # Re-initialize with a actual protein - molecule.__init__(protein) - assert isinstance(molecule.pdb_files, dict) - for pdbfile in molecule.pdb_files.values(): - assert isinstance(pdbfile, PDBFile) - assert len(molecule) == 1 - - -def test_load_single_pdb(molecule, protein): +def test_load_single_pdb(molecule: Molecule, protein: Path): """Test casting into PDBFile.""" - # Re-initialize with a actual protein - molecule.__init__(protein) - assert isinstance(molecule.pdb_files, dict) - for pdbfile in molecule.pdb_files.values(): - assert isinstance(pdbfile, PDBFile) - assert len(molecule) == 1 + with tempfile.TemporaryDirectory('.') as tempdir: + tmp_protein = Path(tempdir, protein.name) + shutil.copyfile(protein, tmp_protein) + # Re-initialize with a actual protein + molecule.__init__(tmp_protein) + assert isinstance(molecule.pdb_files, dict) + print(molecule.pdb_files) + for pdbfile in molecule.pdb_files.values(): + assert isinstance(pdbfile, PDBFile) + assert len(molecule) == 1 From 620e6e19d46cb04067fed067f85375b731f1493e Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 16:34:06 +0200 Subject: [PATCH 12/32] tweak intergration tests --- integration_tests/test_contactmap.py | 2 +- integration_tests/test_ilrmsdmatrix.py | 2 +- integration_tests/test_rmsdmatrix.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/integration_tests/test_contactmap.py b/integration_tests/test_contactmap.py index aaabd5a05..26d4d45bb 100644 --- a/integration_tests/test_contactmap.py +++ b/integration_tests/test_contactmap.py @@ -18,7 +18,7 @@ def contactmap(): """Return contmap module.""" with tempfile.TemporaryDirectory() as tmpdir: preset_contactmap = CMapModule( - order=0, + order=1, path=Path(tmpdir), initial_params=CONTMAP_CONF, ) diff --git a/integration_tests/test_ilrmsdmatrix.py b/integration_tests/test_ilrmsdmatrix.py index 65e11db5a..ddfe1b642 100644 --- a/integration_tests/test_ilrmsdmatrix.py +++ b/integration_tests/test_ilrmsdmatrix.py @@ -22,7 +22,7 @@ def ilrmsdmatrix_module(): """Provide a parametrized IL-RMSD matrix module.""" with tempfile.TemporaryDirectory() as tmpdir: ilrmsdmatrix = IlrmsdmatrixModule( - order=0, path=tmpdir, initial_params=DEFAULT_ILRMSD_CONFIG + order=1, path=Path(tmpdir), initial_params=DEFAULT_ILRMSD_CONFIG ) yield ilrmsdmatrix diff --git a/integration_tests/test_rmsdmatrix.py b/integration_tests/test_rmsdmatrix.py index ed67b7829..d86e9d047 100644 --- a/integration_tests/test_rmsdmatrix.py +++ b/integration_tests/test_rmsdmatrix.py @@ -13,9 +13,9 @@ @pytest.fixture def rmsdmatrix_module(): - with tempfile.TemporaryDirectory() as tmpdir: + with tempfile.TemporaryDirectory(".") as tmpdir: ilrmsdmatrix = rmsdmatrixModule( - order=0, path=tmpdir, initial_params=DEFAULT_RMSD_CONFIG + order=1, path=Path(tmpdir), initial_params=DEFAULT_RMSD_CONFIG ) yield ilrmsdmatrix From 2c6e722d0a949a5a651f5dc4ae521a3a5f6a553a Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 16:45:57 +0200 Subject: [PATCH 13/32] intergaration of alascan fix --- integration_tests/test_alascan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/test_alascan.py b/integration_tests/test_alascan.py index 01e175b04..fd6ef050d 100644 --- a/integration_tests/test_alascan.py +++ b/integration_tests/test_alascan.py @@ -17,7 +17,7 @@ def alascan_module(): """Return a default alascan module.""" with tempfile.TemporaryDirectory(dir=".") as tmpdir: alascan = AlascanModule( - order=0, path=".", initial_params=DEFAULT_ALASCAN_CONFIG + order=1, path=".", initial_params=DEFAULT_ALASCAN_CONFIG ) alascan.params["int_cutoff"] = 3.5 yield alascan From d9881671d100c39b565f37cc3ee6c607898f80d7 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 10 Jul 2024 17:06:52 +0200 Subject: [PATCH 14/32] fixing integration tests --- integration_tests/test_alascan.py | 2 +- integration_tests/test_topoaa.py | 34 +++++++++++++++++++++---------- tests/test_libontology.py | 1 - 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/integration_tests/test_alascan.py b/integration_tests/test_alascan.py index fd6ef050d..a1e244e33 100644 --- a/integration_tests/test_alascan.py +++ b/integration_tests/test_alascan.py @@ -17,7 +17,7 @@ def alascan_module(): """Return a default alascan module.""" with tempfile.TemporaryDirectory(dir=".") as tmpdir: alascan = AlascanModule( - order=1, path=".", initial_params=DEFAULT_ALASCAN_CONFIG + order=1, path=Path("."), initial_params=DEFAULT_ALASCAN_CONFIG ) alascan.params["int_cutoff"] = 3.5 yield alascan diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py index 6853e89f6..cd723da1e 100644 --- a/integration_tests/test_topoaa.py +++ b/integration_tests/test_topoaa.py @@ -1,25 +1,37 @@ import tempfile -from pathlib import Path - import pytest +from pathlib import Path +from shutil import copyfile -from haddock.modules.topology.topoaa import DEFAULT_CONFIG as DEFAULT_TOPOAA_CONFIG -from haddock.modules.topology.topoaa import HaddockModule as TopoaaModule +from haddock.core.defaults import DATA_DIRNAME +from haddock.modules.topology.topoaa import ( + DEFAULT_CONFIG as DEFAULT_TOPOAA_CONFIG, + HaddockModule as TopoaaModule, + ) from . import CNS_EXEC, DATA_DIR, has_cns @pytest.fixture -def topoaa_module(): +def molecules(): + return [ + Path(DATA_DIR, "docking-protein-protein/data/e2aP_1F3G.pdb"), + Path(DATA_DIR, "docking-protein-protein/data/hpr_ensemble.pdb"), + ] + + +@pytest.fixture +def topoaa_module(molecules): with tempfile.TemporaryDirectory() as tmpdir: + mol_copies = [ + copyfile(mol, Path(tmpdir, DATA_DIRNAME, "0_topoaa", mol.name)) + for mol in molecules + ] topoaa = TopoaaModule( - order=0, path=tmpdir, initial_params=DEFAULT_TOPOAA_CONFIG + order=0, path=Path(tmpdir), initial_params=DEFAULT_TOPOAA_CONFIG ) - topoaa.__init__(path=tmpdir, order=0) - topoaa.params["molecules"] = [ - Path(DATA_DIR, "docking-protein-protein/data/e2aP_1F3G.pdb"), - Path(DATA_DIR, "docking-protein-protein/data/hpr_ensemble.pdb"), - ] + topoaa.__init__(path=Path(tmpdir), order=0) + topoaa.params["molecules"] = molecules topoaa.params["mol1"] = {"prot_segid": "A"} topoaa.params["mol2"] = {"prot_segid": "B"} diff --git a/tests/test_libontology.py b/tests/test_libontology.py index 4bbf8763d..964f4c02a 100644 --- a/tests/test_libontology.py +++ b/tests/test_libontology.py @@ -75,7 +75,6 @@ def test_load_single_pdb(molecule: Molecule, protein: Path): # Re-initialize with a actual protein molecule.__init__(tmp_protein) assert isinstance(molecule.pdb_files, dict) - print(molecule.pdb_files) for pdbfile in molecule.pdb_files.values(): assert isinstance(pdbfile, PDBFile) assert len(molecule) == 1 From af2acebe630014a66ab80a4d2af20c9c36c6e96a Mon Sep 17 00:00:00 2001 From: VGPReys Date: Thu, 11 Jul 2024 08:23:08 +0200 Subject: [PATCH 15/32] creating directories in topoaa integration tests --- integration_tests/test_topoaa.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py index cd723da1e..a64ed2463 100644 --- a/integration_tests/test_topoaa.py +++ b/integration_tests/test_topoaa.py @@ -23,15 +23,17 @@ def molecules(): @pytest.fixture def topoaa_module(molecules): with tempfile.TemporaryDirectory() as tmpdir: + input_dir_path = Path(tmpdir, DATA_DIRNAME, "0_topoaa") + input_dir_path.mkdir(parents=True) mol_copies = [ - copyfile(mol, Path(tmpdir, DATA_DIRNAME, "0_topoaa", mol.name)) + copyfile(mol, Path(input_dir_path, mol.name)) for mol in molecules ] topoaa = TopoaaModule( order=0, path=Path(tmpdir), initial_params=DEFAULT_TOPOAA_CONFIG ) topoaa.__init__(path=Path(tmpdir), order=0) - topoaa.params["molecules"] = molecules + topoaa.params["molecules"] = mol_copies topoaa.params["mol1"] = {"prot_segid": "A"} topoaa.params["mol2"] = {"prot_segid": "B"} From 69a93cec08531c06505a172d123bad93a54d392e Mon Sep 17 00:00:00 2001 From: VGPReys Date: Thu, 11 Jul 2024 08:57:11 +0200 Subject: [PATCH 16/32] tweak path --- integration_tests/test_topoaa.py | 39 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py index a64ed2463..134484312 100644 --- a/integration_tests/test_topoaa.py +++ b/integration_tests/test_topoaa.py @@ -4,6 +4,7 @@ from shutil import copyfile from haddock.core.defaults import DATA_DIRNAME +from haddock.libs.libio import working_directory from haddock.modules.topology.topoaa import ( DEFAULT_CONFIG as DEFAULT_TOPOAA_CONFIG, HaddockModule as TopoaaModule, @@ -23,23 +24,27 @@ def molecules(): @pytest.fixture def topoaa_module(molecules): with tempfile.TemporaryDirectory() as tmpdir: - input_dir_path = Path(tmpdir, DATA_DIRNAME, "0_topoaa") - input_dir_path.mkdir(parents=True) - mol_copies = [ - copyfile(mol, Path(input_dir_path, mol.name)) - for mol in molecules - ] - topoaa = TopoaaModule( - order=0, path=Path(tmpdir), initial_params=DEFAULT_TOPOAA_CONFIG - ) - topoaa.__init__(path=Path(tmpdir), order=0) - topoaa.params["molecules"] = mol_copies - topoaa.params["mol1"] = {"prot_segid": "A"} - topoaa.params["mol2"] = {"prot_segid": "B"} - - topoaa.params["cns_exec"] = CNS_EXEC - - yield topoaa + with working_directory(tmpdir): + modulename = "0_topoaa" + input_dir_path = Path(DATA_DIRNAME, modulename) + input_dir_path.mkdir(parents=True) + mol_copies = [ + copyfile(mol, Path(input_dir_path, mol.name)) + for mol in molecules + ] + topoaa = TopoaaModule( + order=0, + path=Path(modulename), + initial_params=DEFAULT_TOPOAA_CONFIG, + ) + topoaa.__init__(path=Path(modulename), order=0) + topoaa.params["molecules"] = mol_copies + topoaa.params["mol1"] = {"prot_segid": "A"} + topoaa.params["mol2"] = {"prot_segid": "B"} + + topoaa.params["cns_exec"] = CNS_EXEC + + yield topoaa @has_cns From 9e38b36d322926e6366d7fc95f08050274991822 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Thu, 11 Jul 2024 09:08:36 +0200 Subject: [PATCH 17/32] tweak path --- integration_tests/test_topoaa.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py index 134484312..0543922bd 100644 --- a/integration_tests/test_topoaa.py +++ b/integration_tests/test_topoaa.py @@ -25,8 +25,9 @@ def molecules(): def topoaa_module(molecules): with tempfile.TemporaryDirectory() as tmpdir: with working_directory(tmpdir): - modulename = "0_topoaa" - input_dir_path = Path(DATA_DIRNAME, modulename) + modulename_path = Path("0_topoaa") + modulename_path.mkdir(parents=True) + input_dir_path = Path(DATA_DIRNAME, modulename_path) input_dir_path.mkdir(parents=True) mol_copies = [ copyfile(mol, Path(input_dir_path, mol.name)) @@ -34,10 +35,10 @@ def topoaa_module(molecules): ] topoaa = TopoaaModule( order=0, - path=Path(modulename), + path=modulename_path, initial_params=DEFAULT_TOPOAA_CONFIG, ) - topoaa.__init__(path=Path(modulename), order=0) + topoaa.__init__(path=modulename_path, order=0) topoaa.params["molecules"] = mol_copies topoaa.params["mol1"] = {"prot_segid": "A"} topoaa.params["mol2"] = {"prot_segid": "B"} From b16d20296a8429472b24f2b3483fe03fc3e224d6 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Thu, 11 Jul 2024 09:58:12 +0200 Subject: [PATCH 18/32] tweak path --- integration_tests/test_topoaa.py | 34 +++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py index 0543922bd..8679838fa 100644 --- a/integration_tests/test_topoaa.py +++ b/integration_tests/test_topoaa.py @@ -22,7 +22,7 @@ def molecules(): @pytest.fixture -def topoaa_module(molecules): +def prepare_topoaa_run(molecules): with tempfile.TemporaryDirectory() as tmpdir: with working_directory(tmpdir): modulename_path = Path("0_topoaa") @@ -33,19 +33,25 @@ def topoaa_module(molecules): copyfile(mol, Path(input_dir_path, mol.name)) for mol in molecules ] - topoaa = TopoaaModule( - order=0, - path=modulename_path, - initial_params=DEFAULT_TOPOAA_CONFIG, - ) - topoaa.__init__(path=modulename_path, order=0) - topoaa.params["molecules"] = mol_copies - topoaa.params["mol1"] = {"prot_segid": "A"} - topoaa.params["mol2"] = {"prot_segid": "B"} - - topoaa.params["cns_exec"] = CNS_EXEC - - yield topoaa + yield modulename_path, mol_copies + +@pytest.fixture +def topoaa_module(prepare_topoaa_run): + modulename_path = prepare_topoaa_run[0] + mol_copies = prepare_topoaa_run[1] + topoaa = TopoaaModule( + order=0, + path=modulename_path, + initial_params=DEFAULT_TOPOAA_CONFIG, + ) + #topoaa.__init__(path=modulename_path, order=0) + topoaa.params["molecules"] = mol_copies + topoaa.params["mol1"] = {"prot_segid": "A"} + topoaa.params["mol2"] = {"prot_segid": "B"} + + topoaa.params["cns_exec"] = CNS_EXEC + + yield topoaa @has_cns From 5a6be5c8c7442ca5c5a1b54a4debfeed8f458aa4 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Thu, 11 Jul 2024 16:56:28 +0200 Subject: [PATCH 19/32] improved regex for ensemble origin parsing --- integration_tests/test_topoaa.py | 1 + src/haddock/libs/libontology.py | 2 +- src/haddock/modules/topology/topoaa/__init__.py | 2 +- tests/golden_data/ens_header.pdb | 3 ++- tests/test_libontology.py | 1 + 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py index 8679838fa..c3bbccb51 100644 --- a/integration_tests/test_topoaa.py +++ b/integration_tests/test_topoaa.py @@ -35,6 +35,7 @@ def prepare_topoaa_run(molecules): ] yield modulename_path, mol_copies + @pytest.fixture def topoaa_module(prepare_topoaa_run): modulename_path = prepare_topoaa_run[0] diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py index 9276435e9..822885c24 100644 --- a/src/haddock/libs/libontology.py +++ b/src/haddock/libs/libontology.py @@ -458,7 +458,7 @@ def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]: lines = text.split(linesep) REMARK_lines = (line for line in lines if line.startswith("REMARK")) re_origin = re.compile( - r"REMARK\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+(([\w_-]+\.?)+)" + r"REMARK\s+\d*\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+[\./]{0,2}(([\w_-]+[/]?)+)\.?" # noqa : E501 ) for line in REMARK_lines: if (match := re_origin.search(line)): diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py index ee8d027ea..ba94217b7 100644 --- a/src/haddock/modules/topology/topoaa/__init__.py +++ b/src/haddock/modules/topology/topoaa/__init__.py @@ -106,7 +106,7 @@ def _run(self) -> None: # extracts `input` key from params. The `input` keyword needs to # be treated separately mol_params: ParamDict = {} - for k in list(self.params.keys()): + for k in self.params.keys(): if k.startswith("mol") and k[3:].isdigit(): mol_params[k] = self.params.pop(k) diff --git a/tests/golden_data/ens_header.pdb b/tests/golden_data/ens_header.pdb index 628625cd5..2a568957e 100644 --- a/tests/golden_data/ens_header.pdb +++ b/tests/golden_data/ens_header.pdb @@ -7,4 +7,5 @@ REMARK MODEL 1 FROM T161-hybrid-fit-C2-NCS_complex_100w.pdb REMARK MODEL 2 FROM T161-hybrid-fit-C2-NCS_complex_101w.pdb REMARK MODEL 3 FROM T161-hybrid-fit-C2-NCS_complex_102w.pdb REMARK MODEL 4 FROM T161-hybrid-fit-C2-NCS_complex_103w.pdb -REMARK MODEL 5 FROM T161-hybrid-fit-C2-NCS_complex_104w.pdb \ No newline at end of file +REMARK MODEL 5 FROM T161-hybrid-fit-C2-NCS_complex_104w.pdb +REMARK 4 MODEL 6 FROM ./hguiw/fewjfo/efewfhewiof/73b07fb2ab6b3245_t264_1.pdb diff --git a/tests/test_libontology.py b/tests/test_libontology.py index c99ece04a..f24322108 100644 --- a/tests/test_libontology.py +++ b/tests/test_libontology.py @@ -133,6 +133,7 @@ def test_get_ensemble_origin( 3: 'T161-hybrid-fit-C2-NCS_complex_102w', 4: 'T161-hybrid-fit-C2-NCS_complex_103w', 5: 'T161-hybrid-fit-C2-NCS_complex_104w', + 6: '73b07fb2ab6b3245_t264_1', } observed_origin = molecule.get_ensemble_origin(ensemble_header_w_md5) assert observed_origin == expected_origin_dic From 2bc55956defca8babd33beebe74ffa9ba90b754e Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Mon, 15 Jul 2024 08:46:26 +0200 Subject: [PATCH 20/32] Update libontology.py regex --- src/haddock/libs/libontology.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py index 822885c24..601b72d95 100644 --- a/src/haddock/libs/libontology.py +++ b/src/haddock/libs/libontology.py @@ -457,13 +457,15 @@ def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]: text = Path(ensemble_f).read_text() lines = text.split(linesep) REMARK_lines = (line for line in lines if line.startswith("REMARK")) + # Compile regex to parse filepath + # https://regex101.com/r/fH0J6a/1 re_origin = re.compile( - r"REMARK\s+\d*\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+[\./]{0,2}(([\w_-]+[/]?)+)\.?" # noqa : E501 + r"REMARK\s+\d*\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+[\./]{0,2}(([\w_\.-]+[/]?)+)\.?" # noqa : E501 ) for line in REMARK_lines: if (match := re_origin.search(line)): model_num = int(match.group(1).strip()) - original_path = match.group(3).strip() + original_path = match.group(4).strip() original_name = Path(original_path).stem origin_dic[model_num] = original_name return origin_dic From cdfefd4d1ad6d597c5fa89edc1494925fa6ee9a4 Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Mon, 15 Jul 2024 09:05:54 +0200 Subject: [PATCH 21/32] Update cli_score.py --- src/haddock/clis/cli_score.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/haddock/clis/cli_score.py b/src/haddock/clis/cli_score.py index 0f03d6394..ea9c118e0 100644 --- a/src/haddock/clis/cli_score.py +++ b/src/haddock/clis/cli_score.py @@ -106,7 +106,19 @@ def maincli() -> None: cli(_ap(), main) -def get_parameters(kwargs: Any) -> dict[str, Any]: +def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]: + """Obtain and validate command line arguments and add defaults one. + + Parameters + ---------- + kwargs : dict[str, Any] + Command line arguments (supposed to be emsocring parameters) + + Return + ------ + ems_dict : dict[str, Any] + Default parameters updated by command line arguments. + """ from os import linesep from haddock.gear.yaml2cfg import read_from_yaml_config from haddock.modules.scoring.emscoring import DEFAULT_CONFIG From 38bb99b82ddf685e8f56d2da13e007821cbf6638 Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Mon, 15 Jul 2024 09:06:36 +0200 Subject: [PATCH 22/32] Update __init__.py --- src/haddock/modules/topology/topoaa/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py index ba94217b7..ee8d027ea 100644 --- a/src/haddock/modules/topology/topoaa/__init__.py +++ b/src/haddock/modules/topology/topoaa/__init__.py @@ -106,7 +106,7 @@ def _run(self) -> None: # extracts `input` key from params. The `input` keyword needs to # be treated separately mol_params: ParamDict = {} - for k in self.params.keys(): + for k in list(self.params.keys()): if k.startswith("mol") and k[3:].isdigit(): mol_params[k] = self.params.pop(k) From 102c8f4606e7f216ff63b63fd3cd7fcf38e0ac34 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 17 Jul 2024 12:50:40 +0200 Subject: [PATCH 23/32] revision v1.1 --- integration_tests/test_topoaa.py | 1 - src/haddock/clis/cli_score.py | 9 +++-- src/haddock/gear/prepare_run.py | 37 +++++++++++++------ src/haddock/libs/libontology.py | 7 +++- .../modules/topology/topoaa/__init__.py | 7 ++-- tests/test_libontology.py | 20 +++++----- 6 files changed, 49 insertions(+), 32 deletions(-) diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py index c3bbccb51..b018b8e36 100644 --- a/integration_tests/test_topoaa.py +++ b/integration_tests/test_topoaa.py @@ -45,7 +45,6 @@ def topoaa_module(prepare_topoaa_run): path=modulename_path, initial_params=DEFAULT_TOPOAA_CONFIG, ) - #topoaa.__init__(path=modulename_path, order=0) topoaa.params["molecules"] = mol_copies topoaa.params["mol1"] = {"prot_segid": "A"} topoaa.params["mol2"] = {"prot_segid": "B"} diff --git a/src/haddock/clis/cli_score.py b/src/haddock/clis/cli_score.py index ea9c118e0..44a9663c6 100644 --- a/src/haddock/clis/cli_score.py +++ b/src/haddock/clis/cli_score.py @@ -18,7 +18,6 @@ """ import argparse import sys -import tempfile from haddock.core.typing import ( Any, @@ -139,17 +138,19 @@ def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]: f"* ATTENTION * Value ({value}) of parameter {param} " f"different from default ({default_emscoring[param]})" ) - # get the type of default value - default_type = type(default_emscoring[param]) # convert the value to the same type - if default_type == bool: + if isinstance(default_emscoring[param], bool): + # In the case of boolean type if value.lower() not in ["true", "false"]: sys.exit( f"* ERROR * Boolean parameter {param} " "should be True or False" ) + # convert into pythonic True or False value = value.lower() == "true" else: + # Cast value into specific python3 type + default_type = type(default_emscoring[param]) value = default_type(value) ems_dict[param] = value n_warnings += 1 diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py index d4c2ab2af..ca32bf706 100644 --- a/src/haddock/gear/prepare_run.py +++ b/src/haddock/gear/prepare_run.py @@ -316,9 +316,15 @@ def setup_run( ) first_module_id = list(modules_params.keys())[0] - if (topoaa_module_id := "topoaa.1") in modules_params.keys(): - topology_params = modules_params[topoaa_module_id] + # Here we check if topoaa is the first module in the workflow. + # If it is, we gather the parameters of topoaa in the topology_params, + # as equired by the function populate_topology_molecule_params(), + # to map the molX parameters info to input molecules. + # Without this, we loose the information. + if first_module_id == "topoaa.1": + topology_params = modules_params[first_module_id] else: + # If not, just fake an empty set of parameters topology_params = {} if starting_from_copy: @@ -335,10 +341,6 @@ def setup_run( general_params["molecules"], topology_params, ) - # copy_molecules_to_topology( - # general_params["molecules"], - # modules_params[first_module_id], - # ) max_mols = len(topology_params["molecules"]) if max_mols > max_molecules_allowed: @@ -816,6 +818,9 @@ def copy_molecules_to_data_dir( topoaa_params : dict A dictionary containing the topoaa parameters. + + _first_module_name : str + Name of the first module used in the workflow. preprocess : bool Whether to preprocess input molecules. Defaults to ``True``. @@ -823,7 +828,7 @@ def copy_molecules_to_data_dir( """ # Removes digit from module name # Build regex to capture '.' - name_digit_regex = re.compile(r"(\w+)\.\d+") + name_digit_regex = re.compile(r"(\w+)(\.\d+)?") first_module_name: str = "input_molecules" if match := name_digit_regex.search(_first_module_name): first_module_name = match.group(1) @@ -1061,16 +1066,24 @@ def _get_expandable( return allowed_params -def populate_topology_molecule_params(topoaa: ParamMap) -> None: - """Populate topoaa `molX` subdictionaries.""" +def populate_topology_molecule_params(topology_params: ParamMap) -> None: + """Populate topoaa `molX` subdictionaries. + + Parameters + ---------- + topology_params : ParamMap + Dictionary of parameter with their values. + Possibily parameters from topoaa module. + If not, nothing will happen + """ topoaa_dft = _read_defaults("topoaa.1") - for i in range(1, len(topoaa["molecules"]) + 1): + for i in range(1, len(topology_params["molecules"]) + 1): mol = f"mol{i}" - topoaa[mol] = recursive_dict_update( + topology_params[mol] = recursive_dict_update( topoaa_dft["mol1"], - topoaa[mol] if mol in topoaa else {}, + topology_params[mol] if mol in topology_params else {}, ) return diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py index 601b72d95..65886ffcb 100644 --- a/src/haddock/libs/libontology.py +++ b/src/haddock/libs/libontology.py @@ -13,6 +13,7 @@ import jsonpickle from haddock.core.defaults import MODULE_IO_FILE +from haddock.core.exceptions import SetupError from haddock.core.typing import ( Any, FilePath, @@ -276,8 +277,10 @@ def load_from_input_molecules( """ # Gather all input molecules input_molecules = list(input_molecules_dir.glob('*.pdb')) - assert input_molecules != [], \ - f"No molecules could be found in `{input_molecules_dir}`" + if input_molecules == []: + raise SetupError( + f"No molecules could be found in `{input_molecules_dir}`" + ) # Sort them by creation date (which is also input order) input_molecules.sort(key=getmtime) # FIXME: getctime ? # Set input attribute diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py index ee8d027ea..7c44af141 100644 --- a/src/haddock/modules/topology/topoaa/__init__.py +++ b/src/haddock/modules/topology/topoaa/__init__.py @@ -154,9 +154,10 @@ def _run(self) -> None: default_params_path=self.toppar_path, write_to_disk=not self.params["less_io"], ) - self.log( - f"Topology CNS input created in {topoaa_input.name}" - ) + if isinstance(topoaa_input, Path): + self.log( + f"Topology CNS input created in {topoaa_input.name}" + ) # Add new job to the pool output_filename = Path(f"{model_path.stem}.{Format.CNS_OUTPUT}") diff --git a/tests/test_libontology.py b/tests/test_libontology.py index f24322108..50306b497 100644 --- a/tests/test_libontology.py +++ b/tests/test_libontology.py @@ -34,7 +34,7 @@ def output_pdbfile() -> Generator[PDBFile, None, None]: @pytest.fixture -def moduleio_with_pdbfile_list(input_pdbfile, output_pdbfile): +def moduleio_with_pdbfile_list(input_pdbfile: PDBFile, output_pdbfile: PDBFile): m = ModuleIO() m.input = [input_pdbfile] m.output = [output_pdbfile, output_pdbfile] @@ -42,7 +42,7 @@ def moduleio_with_pdbfile_list(input_pdbfile, output_pdbfile): @pytest.fixture -def moduleio_with_pdbfile_dict(output_pdbfile): +def moduleio_with_pdbfile_dict(output_pdbfile: PDBFile): m = ModuleIO() m.input = [] m.output = [ @@ -76,7 +76,7 @@ def io_data() -> dict: @pytest.fixture -def io_json_file(io_data) -> Generator[Path, None, None]: +def io_json_file(io_data: dict) -> Generator[Path, None, None]: with tempfile.NamedTemporaryFile(mode="w+") as f: json.dump(io_data, f) @@ -88,7 +88,7 @@ def io_json_file(io_data) -> Generator[Path, None, None]: @pytest.fixture def molecule(): - return Molecule(None) + return Molecule(None) # type: ignore @pytest.fixture @@ -306,7 +306,7 @@ def test_moduleio_add_list(): assert moduleio.output == ["literally", "anything"] -def test_moduleio_save(mocker, moduleio_with_pdbfile_list): +def test_moduleio_save(mocker, moduleio_with_pdbfile_list: ModuleIO): with tempfile.NamedTemporaryFile() as temp_module_io_f: mocker.patch("haddock.core.defaults", temp_module_io_f.name) @@ -328,7 +328,7 @@ def test_moduleio_save(mocker, moduleio_with_pdbfile_list): assert isinstance(observed_data, dict) -def test_moduleio_load(io_json_file, io_data): +def test_moduleio_load(io_json_file: Path, io_data: dict): moduleio = ModuleIO() moduleio.load(filename=io_json_file) @@ -337,7 +337,7 @@ def test_moduleio_load(io_json_file, io_data): assert moduleio.output == io_data["output"] -def test_moduleio_retrieve_models_list(moduleio_with_pdbfile_list): +def test_moduleio_retrieve_models_list(moduleio_with_pdbfile_list: ModuleIO): result = moduleio_with_pdbfile_list.retrieve_models() @@ -346,7 +346,7 @@ def test_moduleio_retrieve_models_list(moduleio_with_pdbfile_list): assert isinstance(result[1], PDBFile) -def test_moduleio_retrieve_models_dict(moduleio_with_pdbfile_dict): +def test_moduleio_retrieve_models_dict(moduleio_with_pdbfile_dict: ModuleIO): result = moduleio_with_pdbfile_dict.retrieve_models( crossdock=True, individualize=True @@ -379,7 +379,7 @@ def test_moduleio_retrieve_models_dict(moduleio_with_pdbfile_dict): assert isinstance(result[0][0], PDBFile) -def test_moduleio_check_faulty(mocker, module_io_with_persistent): +def test_moduleio_check_faulty(mocker, module_io_with_persistent: ModuleIO): mocker.patch.object(module_io_with_persistent, "remove_missing", return_value=None) @@ -400,7 +400,7 @@ def test_moduleio_check_faulty(mocker, module_io_with_persistent): assert result == pytest.approx(10.0) -def test_moduleio_remove_missing(module_io_with_persistent): +def test_moduleio_remove_missing(module_io_with_persistent: ModuleIO): # Remove the first file first_file = module_io_with_persistent.output[0].rel_path From a03cdb5cfbc2d920290c70353473b8f38759d021 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 17 Jul 2024 14:36:36 +0200 Subject: [PATCH 24/32] modify step index of caprieval and rigidbody integration tests --- integration_tests/test_caprieval.py | 2 +- integration_tests/test_rigidbody.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration_tests/test_caprieval.py b/integration_tests/test_caprieval.py index ac51e39c0..c276d93aa 100644 --- a/integration_tests/test_caprieval.py +++ b/integration_tests/test_caprieval.py @@ -16,7 +16,7 @@ def caprieval_module(): with tempfile.TemporaryDirectory() as tmpdir: yield CaprievalModule( - order=0, + order=1, path=Path(tmpdir), init_params=DEFAULT_CAPRIEVAL_CONFIG, ) diff --git a/integration_tests/test_rigidbody.py b/integration_tests/test_rigidbody.py index fa69f3452..f75c847f1 100644 --- a/integration_tests/test_rigidbody.py +++ b/integration_tests/test_rigidbody.py @@ -16,7 +16,7 @@ def rigidbody_module(): with tempfile.TemporaryDirectory() as tmpdir: rigidbody = RigidbodyModule( - order=0, path=Path(tmpdir), initial_params=DEFAULT_RIGIDBODY_CONFIG + order=1, path=Path(tmpdir), initial_params=DEFAULT_RIGIDBODY_CONFIG ) yield rigidbody From cb8102541366bb79d99417d0819234835bf3e871 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 17 Jul 2024 14:56:59 +0200 Subject: [PATCH 25/32] removing List type import --- src/haddock/core/typing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/haddock/core/typing.py b/src/haddock/core/typing.py index c8155caab..73a523b0d 100644 --- a/src/haddock/core/typing.py +++ b/src/haddock/core/typing.py @@ -30,7 +30,6 @@ Generic, Iterable, Iterator, - List, Literal, Mapping, MutableMapping, From 9add2c104c81d3f488fadac89c523dcec3e78213 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 17 Jul 2024 15:32:32 +0200 Subject: [PATCH 26/32] adding tests --- src/haddock/clis/cli_score.py | 20 +++++++++++++++----- src/haddock/core/typing.py | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/haddock/clis/cli_score.py b/src/haddock/clis/cli_score.py index 44a9663c6..557764bef 100644 --- a/src/haddock/clis/cli_score.py +++ b/src/haddock/clis/cli_score.py @@ -19,6 +19,7 @@ import argparse import sys +from haddock.core.exceptions import ConfigurationError from haddock.core.typing import ( Any, ArgumentParser, @@ -121,13 +122,13 @@ def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]: from os import linesep from haddock.gear.yaml2cfg import read_from_yaml_config from haddock.modules.scoring.emscoring import DEFAULT_CONFIG - # config all parameters are correctly spelled. + # check all parameters are correctly spelled. default_emscoring = read_from_yaml_config(DEFAULT_CONFIG) ems_dict = default_emscoring.copy() n_warnings = 0 for param, value in kwargs.items(): if param not in default_emscoring: - sys.exit( + raise ConfigurationError( f"* ERROR * Parameter {param!r} is not a " f"valid `emscoring` parameter.{linesep}" "Valid emscoring parameters are: " @@ -142,7 +143,7 @@ def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]: if isinstance(default_emscoring[param], bool): # In the case of boolean type if value.lower() not in ["true", "false"]: - sys.exit( + raise ConfigurationError( f"* ERROR * Boolean parameter {param} " "should be True or False" ) @@ -151,7 +152,13 @@ def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]: else: # Cast value into specific python3 type default_type = type(default_emscoring[param]) - value = default_type(value) + try: + value = default_type(value) + except ValueError: + raise ConfigurationError( + f"* ERROR * parameter '{param}' must be of " + f"type '{default_type.__name__}'" + ) ems_dict[param] = value n_warnings += 1 if n_warnings != 0: @@ -219,7 +226,10 @@ def main( sys.exit(f"* ERROR * Input PDB file {str(input_pdb)!r} does not exist") # Get parameters - ems_dict = get_parameters(kwargs) + try: + ems_dict = get_parameters(kwargs) + except ConfigurationError as config_error: + sys.exit(config_error) # create run directory run_dir = Path(run_dir) diff --git a/src/haddock/core/typing.py b/src/haddock/core/typing.py index 73a523b0d..e5d048fb2 100644 --- a/src/haddock/core/typing.py +++ b/src/haddock/core/typing.py @@ -31,6 +31,7 @@ Iterable, Iterator, Literal, + List, Mapping, MutableMapping, Optional, From f8d9dd4ce6fb1146b7d6bd4f53d0485dcceb08be Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 17 Jul 2024 15:33:33 +0200 Subject: [PATCH 27/32] adding tests --- tests/test_cli_score.py | 76 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 tests/test_cli_score.py diff --git a/tests/test_cli_score.py b/tests/test_cli_score.py new file mode 100644 index 000000000..4471a9cbb --- /dev/null +++ b/tests/test_cli_score.py @@ -0,0 +1,76 @@ +"""Tests related to haddock.clis.cli_score""" +import pytest + +from haddock.clis.cli_score import get_parameters +from haddock.core.exceptions import ConfigurationError +from haddock.gear.yaml2cfg import read_from_yaml_config +from haddock.modules.scoring.emscoring import ( + DEFAULT_CONFIG as EMSCORING_DEFAULTS_CONFIG_PATH, + ) + + +@pytest.fixture +def empty_params() -> dict: + return {} + + +@pytest.fixture +def v_cmd_line_params() -> dict: + return { + "w_bsa": 10, + "w_desolv": 10, + "w_elec": 10, + "w_vdw": 10, + } + + +@pytest.fixture +def wrong_params() -> dict: + return {"fake": "wrong"} + + +@pytest.fixture +def wrong_params_type() -> dict: + return {"w_bsa": "wrong"} + + +@pytest.fixture +def default_emscoring_params() -> dict: + default_emscoring = read_from_yaml_config(EMSCORING_DEFAULTS_CONFIG_PATH) + return default_emscoring + + +def test_no_input_params(empty_params: dict, default_emscoring_params: dict): + """Test get_parameters without inputs.""" + final_params = get_parameters(empty_params) + assert isinstance(final_params, dict) + for param_name, param_value in default_emscoring_params.items(): + assert final_params[param_name] == param_value + + +def test_input_params( + v_cmd_line_params: dict[str, int], + default_emscoring_params: dict, + ): + """Test get_parameters with inputs.""" + final_params = get_parameters(v_cmd_line_params) + assert isinstance(final_params, dict) + for param_name, param_value in default_emscoring_params.items(): + if param_name in v_cmd_line_params.keys(): + assert final_params[param_name] == v_cmd_line_params[param_name] + else: + assert final_params[param_name] == param_value + + +def test_wrong_params(wrong_params: dict[str, str]): + """Test get_parameters with wrong inputs.""" + with pytest.raises(ConfigurationError): + final_params = get_parameters(wrong_params) + assert final_params is None + + +def test_wrong_param_type(wrong_params_type: dict[str, str]): + """Test get_parameters with wrong inputs.""" + with pytest.raises(ConfigurationError): + final_params = get_parameters(wrong_params_type) + assert final_params is None From 451f942fdc52023082647fa355c5bab75c846b41 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Mon, 29 Jul 2024 16:11:45 +0200 Subject: [PATCH 28/32] adding new exception DependencyError --- src/haddock/core/exceptions.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/haddock/core/exceptions.py b/src/haddock/core/exceptions.py index 44a2c8578..0c20d92fa 100644 --- a/src/haddock/core/exceptions.py +++ b/src/haddock/core/exceptions.py @@ -53,3 +53,25 @@ class HaddockTermination(HaddockError): """Terminates HADDOCK.""" pass + + +class DependencyError(ModuleError): + """Error throw when required dependency not satisfied.""" + + def __init__( + self, + msg: str = "", + module: str = "", + dependency: str = "", + ): + self.message = msg + self.module = module + self.dependency = dependency + + def __str__(self) -> str: + additions: str = "" + if self.module: + additions += f"Module `{self.module}` -> " + if self.dependency: + additions += f"Required dependency `{self.dependency}`" + return f"{self.message} {additions}" From 3d2a34558cf335814a5bfc1e5df338355c3cc4ad Mon Sep 17 00:00:00 2001 From: VGPReys Date: Mon, 29 Jul 2024 16:13:22 +0200 Subject: [PATCH 29/32] adding dependency checks at workflow prepare_run --- src/haddock/core/defaults.py | 20 ++++---- src/haddock/gear/prepare_run.py | 50 ++++++++++++++++++- src/haddock/libs/libworkflow.py | 9 ++-- .../modules/analysis/caprieval/capri.py | 22 ++++---- 4 files changed, 77 insertions(+), 24 deletions(-) diff --git a/src/haddock/core/defaults.py b/src/haddock/core/defaults.py index b8bb70620..47804f464 100644 --- a/src/haddock/core/defaults.py +++ b/src/haddock/core/defaults.py @@ -21,13 +21,13 @@ MODULE_PATH_NAME = "step_" """ Module input and generated data will be stored in folder starting by -this prefix""" +this prefix.""" MODULE_IO_FILE = "io.json" -"""Default name for exchange module information file""" +"""Default name for exchange module information file.""" MAX_NUM_MODULES = 10000 -"""Temptative number of max allowed number of modules to execute""" +"""Temptative number of max allowed number of modules to execute.""" valid_run_dir_chars = string.ascii_letters + string.digits + "._-/\\" @@ -40,12 +40,14 @@ DATA_DIRNAME = "data" """Name given to the directory holding data.""" -CNS_MODULES = ["rigidbody", - "flexref", - "emscoring", - "mdscoring", - "mdref", - "emref"] +CNS_MODULES = ( + "rigidbody", + "flexref", + "emscoring", + "mdscoring", + "mdref", + "emref", + ) """List of CNS modules available in HADDOCK3.""" diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py index ca32bf706..be6b94982 100644 --- a/src/haddock/gear/prepare_run.py +++ b/src/haddock/gear/prepare_run.py @@ -15,7 +15,11 @@ from haddock import EmptyPath, contact_us, haddock3_source_path, log from haddock.core.defaults import RUNDIR, max_molecules_allowed, DATA_DIRNAME -from haddock.core.exceptions import ConfigurationError, ModuleError +from haddock.core.exceptions import ( + ConfigurationError, + ModuleError, + DependencyError, + ) from haddock.core.typing import ( Any, Callable, @@ -290,6 +294,7 @@ def setup_run( if from_scratch: check_run_dir_exists(general_params[RUNDIR]) + check_CNS_usage(modules_params) if scratch_rest0: check_mandatory_argments_are_present(general_params) @@ -930,7 +935,10 @@ def check_run_dir_exists(run_dir: FilePath) -> None: def identify_modules(params: Iterable[str]) -> list[str]: """Identify keys (headings) belonging to HADDOCK3 modules.""" - modules_keys = [k for k in params if get_module_name(k) in modules_category] + modules_keys = [ + param_name for param_name in params + if get_module_name(param_name) in modules_category + ] return modules_keys @@ -1295,3 +1303,41 @@ def update_step_names_in_file( text = text.replace(s1, s2) file_.write_text(text) return + + +def check_CNS_usage(modules_params: ParamMap) -> None: + """Check that a topology module is run prior to modules requiring CNS. + + Parameters + ---------- + modules_params : ParamMap + Dict of modules parameters. + Only used to obtain ordered list of modules. + + Raises + ------ + DependencyError + Error thrown if topology not run before a CNS module. + """ + from haddock.core.defaults import CNS_MODULES + generated_topology: bool = False + for _module_name in modules_params: + module_name = get_module_name(_module_name) + # Check if this module is a topology module + if modules_category[module_name] == "topology": + # Set the flag + generated_topology = True + # Check if this module is a CNS module (that require topology) + if module_name in CNS_MODULES: + # Check that topology was generated + if not generated_topology: + raise DependencyError( + msg="A topology module should be used prior to CNS module.", + module=module_name, + dependency=", ".join([ + k for k, v in modules_category.items() + if v == "topology" + ]) + ) + # We can stop here as either error raised or check passsed + break diff --git a/src/haddock/libs/libworkflow.py b/src/haddock/libs/libworkflow.py index d972f627c..6b00e0be7 100644 --- a/src/haddock/libs/libworkflow.py +++ b/src/haddock/libs/libworkflow.py @@ -110,8 +110,8 @@ def __init__( # Create the list of steps contained in this workflow self.steps: list[Step] = [] _items = enumerate(modules_parameters.items(), start=start) - for num_stage, (stage_name, params) in _items: - stage_name = get_module_name(stage_name) + for num_stage, (_stage_name, params) in _items: + stage_name = get_module_name(_stage_name) log.info(f"Reading instructions step {num_stage}_{stage_name}") # updates the module's specific parameter with global parameters @@ -161,7 +161,10 @@ def execute(self) -> None: ] ) module_lib = importlib.import_module(module_name) - self.module = module_lib.HaddockModule(order=self.order, path=self.working_path) + self.module = module_lib.HaddockModule( + order=self.order, + path=self.working_path, + ) # Run module start = time() diff --git a/src/haddock/modules/analysis/caprieval/capri.py b/src/haddock/modules/analysis/caprieval/capri.py index 9dd2cae61..759d76823 100644 --- a/src/haddock/modules/analysis/caprieval/capri.py +++ b/src/haddock/modules/analysis/caprieval/capri.py @@ -1,13 +1,14 @@ """CAPRI module.""" import copy +import json import os import shutil import tempfile + from itertools import combinations from pathlib import Path - os.environ["OPENBLAS_NUM_THREADS"] = "1" import numpy as np @@ -26,8 +27,8 @@ ParamDict, ParamMap, Union, - Type, ) +from haddock.gear.config import load as read_config from haddock.libs.libalign import ( ALIGNError, calc_rmsd, @@ -44,19 +45,21 @@ WEIGHTS = ["w_elec", "w_vdw", "w_desolv", "w_bsa", "w_air"] -import json - -from haddock.gear.config import load as read_config -def get_previous_cns_step(sel_steps: list, st_order: int) -> Union[str, None]: +def get_previous_cns_step( + sel_steps: list[str], + step_order: int, + ) -> Optional[str]: """ Get the previous CNS step. Parameters ---------- - run_path : Path - Path to the run folder. + sel_steps : list[str] + Selected steps. + step_order : int + Index of the step. Returns ------- @@ -67,7 +70,7 @@ def get_previous_cns_step(sel_steps: list, st_order: int) -> Union[str, None]: cns_step = None # just to be careful, remove steps with more than one underscore sel_steps = [step for step in sel_steps if step.count("_") == 1] - mod = min(st_order - 1, len(sel_steps) - 1) + mod = min(step_order - 1, len(sel_steps) - 1) # loop while mod > -1: st_name = sel_steps[mod].split("_")[1] @@ -75,7 +78,6 @@ def get_previous_cns_step(sel_steps: list, st_order: int) -> Union[str, None]: cns_step = sel_steps[mod] break mod -= 1 - return cns_step From 6e121fa84688eec2268fe9ee99066a6a472678fc Mon Sep 17 00:00:00 2001 From: VGPReys Date: Mon, 29 Jul 2024 16:13:50 +0200 Subject: [PATCH 30/32] test to the DependencyError during prepare_run --- tests/test_gear_prepare_run.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/test_gear_prepare_run.py b/tests/test_gear_prepare_run.py index 4c03091a7..123671903 100644 --- a/tests/test_gear_prepare_run.py +++ b/tests/test_gear_prepare_run.py @@ -5,8 +5,10 @@ import pytest -from haddock.core.exceptions import ConfigurationError +from haddock.core.defaults import CNS_MODULES +from haddock.core.exceptions import ConfigurationError, DependencyError from haddock.gear.prepare_run import ( + check_CNS_usage, check_if_path_exists, copy_molecules_to_topology, fuzzy_match, @@ -370,3 +372,20 @@ def test_param_value_error(defaultparams, key, value): """ with pytest.raises(ConfigurationError): validate_value(defaultparams, key, value) + + +def test_check_CNS_usage(): + """Test if check_CNS_usage is functional.""" + # Case were topology is run before + check = check_CNS_usage({"topoaa": {}, "mdref": {}}) + assert check is None + # Loop over CNS modules requiring topology to be accessible + for cns_module in CNS_MODULES: + # Case were topology is run after + with pytest.raises(DependencyError): + check_exception1 = check_CNS_usage({cns_module: {}, "topoaa": {}}) + assert check_exception1 is None + # Case were topology not run at all + with pytest.raises(DependencyError): + check_exception2 = check_CNS_usage({cns_module: {}}) + assert check_exception2 is None From 364a5e460a05721fe44485b070411b75f6d12fb1 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Mon, 29 Jul 2024 16:22:47 +0200 Subject: [PATCH 31/32] fixing types --- src/haddock/modules/analysis/caprieval/capri.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/haddock/modules/analysis/caprieval/capri.py b/src/haddock/modules/analysis/caprieval/capri.py index 759d76823..1381f9b87 100644 --- a/src/haddock/modules/analysis/caprieval/capri.py +++ b/src/haddock/modules/analysis/caprieval/capri.py @@ -1121,7 +1121,7 @@ def __init__(self, msg: str = "") -> None: def dump_weights(order: int) -> None: sel_steps = get_module_steps_folders(Path("..")) - cns_step = get_previous_cns_step(sel_steps=sel_steps, st_order=order) + cns_step = get_previous_cns_step(sel_steps=sel_steps, step_order=order) if cns_step: log.info(f"Found previous CNS step: {cns_step}") scoring_params_fname = save_scoring_weights(cns_step) From 9f9a2d0764009ef9837d714ca1ae64183b87d2c5 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Tue, 30 Jul 2024 15:29:26 +0200 Subject: [PATCH 32/32] remove useless import --- src/haddock/modules/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/haddock/modules/__init__.py b/src/haddock/modules/__init__.py index 084399d18..4c7d17d56 100644 --- a/src/haddock/modules/__init__.py +++ b/src/haddock/modules/__init__.py @@ -1,5 +1,6 @@ """HADDOCK3 modules.""" import re + from abc import ABC, abstractmethod from contextlib import contextmanager, suppress from copy import deepcopy