From 5a5a4394fdd73ff0cae828e63363493c0b703bf7 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 26 Jun 2024 15:40:07 +0200
Subject: [PATCH 01/32] adding checks for duplicated parameters in
 defaults.yaml

---
 tests/test_gear_yaml2cfg.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tests/test_gear_yaml2cfg.py b/tests/test_gear_yaml2cfg.py
index dbfc28cf5..b0dff15b4 100644
--- a/tests/test_gear_yaml2cfg.py
+++ b/tests/test_gear_yaml2cfg.py
@@ -1,6 +1,10 @@
 """Test yaml2cfg gear."""
 import filecmp
+import re
+import os
+import pytest
 from pathlib import Path
+from fnmatch import fnmatch
 
 from haddock.gear.yaml2cfg import flat_yaml_cfg, yaml2cfg_text
 from haddock.libs.libio import read_from_yaml
@@ -11,6 +15,17 @@
     haddock3_yaml_converted_no_header,
     )
 
+@pytest.fixture
+def default_yaml_files():
+    """Return list of defaults.yaml file withing the haddock src directory."""
+    all_defaults_yaml: list[str] = []
+    default_yaml_fname = "defaults.yaml"
+    for path, _subdirs, files in os.walk('../src/haddock/'):
+        for name in files:
+            if fnmatch(name, default_yaml_fname):
+                all_defaults_yaml.append(f"{path}/{default_yaml_fname}")
+    return all_defaults_yaml
+
 
 complex_cfg = {
     "param1": {
@@ -97,3 +112,19 @@ def test_yaml2cfg_test_no_header():
         shallow=False,
         )
     p.unlink()
+
+
+def test_yaml_duplicated_params(default_yaml_files):
+    """Make sure no duplicated parameters are present in a ymal file."""
+    # Build regular expression
+    yaml_param_regex = re.compile("^(([A-Za-z0-9]_?)+):")
+    for yaml_fpath in default_yaml_files:
+        # Loop over default yaml files
+        parsed_param_names: dict[str, int] = {}
+        with open(yaml_fpath, 'r') as filin:
+            yaml_content = filin.readlines()
+        for i, line in enumerate(yaml_content, start=1):
+            if (match := yaml_param_regex.search(line)):
+                param_name = match.group(1)
+                assert param_name not in parsed_param_names.keys(), f"Parameter '{param_name}' in {yaml_fpath} has duplicates: l.{parsed_param_names[param_name]} and l.{i}"  # noqa : E501
+                parsed_param_names[param_name] = i

From 5b109b5806a45f5c42aa99cb050bc614b738fff6 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 3 Jul 2024 15:47:55 +0200
Subject: [PATCH 02/32] trial1

---
 src/haddock/clis/__init__.py                  |   6 +-
 src/haddock/core/__init__.py                  |   4 +-
 src/haddock/core/defaults.py                  |   3 +
 src/haddock/core/typing.py                    |   1 +
 src/haddock/gear/prepare_run.py               |  99 +++++---
 src/haddock/gear/preprocessing.py             |  11 +-
 src/haddock/libs/libio.py                     |  72 +++---
 src/haddock/libs/libontology.py               | 228 +++++++++++++++---
 src/haddock/libs/libpdb.py                    |  46 ++--
 src/haddock/libs/libstructure.py              |  10 +-
 src/haddock/libs/libutil.py                   |  13 +
 src/haddock/libs/libworkflow.py               |  21 +-
 src/haddock/modules/__init__.py               |  47 ++--
 .../modules/topology/topoaa/__init__.py       | 198 ++++-----------
 .../modules/topology/topoaa/defaults.yaml     |   1 -
 tests/test_libworkflow.py                     |  22 +-
 16 files changed, 463 insertions(+), 319 deletions(-)

diff --git a/src/haddock/clis/__init__.py b/src/haddock/clis/__init__.py
index 768d257c0..33cf0707f 100644
--- a/src/haddock/clis/__init__.py
+++ b/src/haddock/clis/__init__.py
@@ -1,8 +1,8 @@
 """
 Command-line interfaces.
 
-HADDOCK3 has a series of command-line interfaces (CLIs), from which the most important
-one is ``haddock3``. You can ask help to the CLIs with the ``-h`` flag::
-
+HADDOCK3 has a series of command-line interfaces (CLIs), from which the most
+important one is ``haddock3``.
+You can ask help to the CLIs with the ``-h`` flag:
     haddock3 -h
 """
diff --git a/src/haddock/core/__init__.py b/src/haddock/core/__init__.py
index 5fbd962b5..114481b4a 100644
--- a/src/haddock/core/__init__.py
+++ b/src/haddock/core/__init__.py
@@ -1,3 +1 @@
-"""
-Core haddock3 functionalities.
-"""
+"""Core haddock3 functionalities."""
diff --git a/src/haddock/core/defaults.py b/src/haddock/core/defaults.py
index 20a705292..b8bb70620 100644
--- a/src/haddock/core/defaults.py
+++ b/src/haddock/core/defaults.py
@@ -37,6 +37,9 @@
 INTERACTIVE_RE_SUFFIX = "interactive"
 """Suffix added to interactive haddock3-re runs."""
 
+DATA_DIRNAME = "data"
+"""Name given to the directory holding data."""
+
 CNS_MODULES = ["rigidbody",
                "flexref",
                "emscoring",
diff --git a/src/haddock/core/typing.py b/src/haddock/core/typing.py
index 73a523b0d..c8155caab 100644
--- a/src/haddock/core/typing.py
+++ b/src/haddock/core/typing.py
@@ -30,6 +30,7 @@
     Generic,
     Iterable,
     Iterator,
+    List,
     Literal,
     Mapping,
     MutableMapping,
diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py
index 075d2e548..d4c2ab2af 100644
--- a/src/haddock/gear/prepare_run.py
+++ b/src/haddock/gear/prepare_run.py
@@ -4,8 +4,8 @@
 import itertools as it
 import json
 import os
+import re
 import shutil
-import string
 import sys
 import tarfile
 from contextlib import contextmanager, suppress
@@ -14,7 +14,7 @@
 from pathlib import Path, PosixPath
 
 from haddock import EmptyPath, contact_us, haddock3_source_path, log
-from haddock.core.defaults import RUNDIR, max_molecules_allowed
+from haddock.core.defaults import RUNDIR, max_molecules_allowed, DATA_DIRNAME
 from haddock.core.exceptions import ConfigurationError, ModuleError
 from haddock.core.typing import (
     Any,
@@ -296,7 +296,7 @@ def setup_run(
 
     if restarting_from:
         remove_folders_after_number(general_params[RUNDIR], restart_from)
-        _data_dir = Path(general_params[RUNDIR], "data")
+        _data_dir = Path(general_params[RUNDIR], DATA_DIRNAME)
         remove_folders_after_number(_data_dir, restart_from)
 
     if restarting_from or starting_from_copy:
@@ -315,6 +315,12 @@ def setup_run(
             dec_all=True,
         )
 
+    first_module_id = list(modules_params.keys())[0]
+    if (topoaa_module_id := "topoaa.1") in modules_params.keys():
+        topology_params = modules_params[topoaa_module_id]
+    else:
+        topology_params = {}
+
     if starting_from_copy:
         num_steps = len(step_folders)
         _num_modules = len(modules_params)
@@ -327,24 +333,27 @@ def setup_run(
     else:
         copy_molecules_to_topology(
             general_params["molecules"],
-            modules_params["topoaa.1"],
+            topology_params,
         )
+        # copy_molecules_to_topology(
+        #     general_params["molecules"],
+        #     modules_params[first_module_id],
+        # )
 
-        if len(modules_params["topoaa.1"]["molecules"]) > max_molecules_allowed:
+        max_mols = len(topology_params["molecules"])
+        if max_mols > max_molecules_allowed:
             raise ConfigurationError(
                 f"Too many molecules defined, max is {max_molecules_allowed}."
-            )  # noqa: E501
+                )
 
         zero_fill.read(modules_params)
 
-        populate_topology_molecule_params(modules_params["topoaa.1"])
-        populate_mol_parameters(modules_params)
-
-        max_mols = len(modules_params["topoaa.1"]["molecules"])
+        populate_topology_molecule_params(topology_params)
+        populate_mol_parameters(modules_params, topology_params)
 
     if not from_scratch:
         _prev, _new = renum_step_folders(general_params[RUNDIR])
-        renum_step_folders(Path(general_params[RUNDIR], "data"))
+        renum_step_folders(Path(general_params[RUNDIR], DATA_DIRNAME))
         if UNPACK_FOLDERS:  # only if there was any folder unpacked
             update_unpacked_names(_prev, _new, UNPACK_FOLDERS)
         update_step_contents_to_step_names(
@@ -367,7 +376,8 @@ def setup_run(
     if scratch_rest0:
         copy_molecules_to_data_dir(
             data_dir,
-            modules_params["topoaa.1"],
+            topology_params,
+            first_module_id,
             preprocess=general_params["preprocess"],
         )
 
@@ -418,8 +428,9 @@ def save_configuration_files(configs: dict, datadir: Union[str, Path]) -> dict:
     # Initiate files data
     infofile = {
         "raw_input": (
-            "An untouched copy of the raw input file, " "as provided by the user."
-        ),
+            "An untouched copy of the raw input file, "
+            "as provided by the user."
+            ),
         "cleaned_input": (
             "Pre-parsed input file where (eventually) "
             "some indexing and modifications were "
@@ -427,8 +438,8 @@ def save_configuration_files(configs: dict, datadir: Union[str, Path]) -> dict:
         ),
         "enhanced_haddock_params": (
             "Final input file with detailed default parameters."
-        ),
-    }
+            ),
+        }
     added_files = {}
     # Set list of configurations that wish to be saved
     list_save_conf = [
@@ -775,7 +786,7 @@ def create_data_dir(run_dir: FilePath) -> Path:
     pathlib.Path
         A path referring only to 'data'.
     """
-    data_dir = Path(run_dir, "data")
+    data_dir = Path(run_dir, DATA_DIRNAME)
     data_dir.mkdir(parents=True, exist_ok=True)
     return data_dir
 
@@ -789,8 +800,11 @@ def copy_molecules_to_topology(
 
 
 def copy_molecules_to_data_dir(
-    data_dir: Path, topoaa_params: ParamMap, preprocess: bool = True
-) -> None:
+        data_dir: Path,
+        topoaa_params: ParamMap,
+        _first_module_name: str,
+        preprocess: bool = True,
+        ) -> None:
     """
     Copy molecules to data directory and to topoaa parameters.
 
@@ -807,7 +821,13 @@ def copy_molecules_to_data_dir(
         Whether to preprocess input molecules. Defaults to ``True``.
         See :py:mod:`haddock.gear.preprocessing`.
     """
-    topoaa_dir = zero_fill.fill("topoaa", 0)
+    # Removes digit from module name
+    # Build regex to capture '<name>.<digit>'
+    name_digit_regex = re.compile(r"(\w+)\.\d+")
+    first_module_name: str = "input_molecules"
+    if match := name_digit_regex.search(_first_module_name):
+        first_module_name = match.group(1)
+    topoaa_dir = zero_fill.fill(first_module_name, 0)
 
     # define paths
     data_topoaa_dir = Path(data_dir, topoaa_dir)
@@ -815,32 +835,34 @@ def copy_molecules_to_data_dir(
     rel_data_topoaa_dir = Path(data_dir.name, topoaa_dir)
     original_mol_dir = Path(data_dir, "original_molecules")
 
+    # Init new molecule holder to be filled with relative paths
     new_molecules: list[Path] = []
+    # Loop over input molecules
     for molecule in copy(topoaa_params["molecules"]):
         check_if_path_exists(molecule)
-
         mol_name = Path(molecule).name
-
-        if preprocess:  # preprocess PDB files
-            top_fname = topoaa_params.get("ligand_top_fname", False)
-            new_residues = read_additional_residues(top_fname) if top_fname else None
-
-            new_pdbs = process_pdbs(molecule, user_supported_residues=new_residues)
-
-            # copy the original molecule
+        # preprocess PDB files
+        if preprocess:
+            # copy the un-processed molecule (for later checks)
             original_mol_dir.mkdir(parents=True, exist_ok=True)
             original_mol = Path(original_mol_dir, mol_name)
             shutil.copy(molecule, original_mol)
-
+            # Gather potential user-provided topology file
+            top_fname = topoaa_params.get("ligand_top_fname", False)
+            new_residues = read_additional_residues(top_fname) if top_fname else None
+            # Do the pre-processing of file
+            new_pdbs = process_pdbs(molecule, user_supported_residues=new_residues)
             # write the new processed molecule
             new_pdb = os.linesep.join(new_pdbs[0])
             Path(data_topoaa_dir, mol_name).write_text(new_pdb)
-
+        # Do not preprocess
         else:
+            # Create a copy of input molecules into `data/0_firstmodule`
             shutil.copy(molecule, Path(data_topoaa_dir, mol_name))
-
-        new_molecules.append(Path(rel_data_topoaa_dir, mol_name))
-
+        # Create relative path of the molecule
+        data_dir_molecule_relpath = Path(rel_data_topoaa_dir, mol_name)
+        new_molecules.append(data_dir_molecule_relpath)
+    # Modify molecules parameters to point relative path of copied files
     topoaa_params["molecules"] = copy(new_molecules)
 
 
@@ -984,7 +1006,7 @@ def get_expandable_parameters(
     # the topoaa module is an exception because it has subdictionaries
     # for the `mol` parameter. Instead of defining a general recursive
     # function, I decided to add a simple if/else exception.
-    # no other module should have subdictionaries has parameters
+    # no other module should have subdictionaries as parameters
     if get_module_name(module_name) == "topoaa":
         ap: set[str] = set()  # allowed_parameters
         ap.update(_get_expandable(user_config, defaults, module_name, max_mols))
@@ -1053,7 +1075,10 @@ def populate_topology_molecule_params(topoaa: ParamMap) -> None:
     return
 
 
-def populate_mol_parameters(modules_params: ParamMap) -> None:
+def populate_mol_parameters(
+        modules_params: ParamMap,
+        topology_params: ParamMap,
+        ) -> None:
     """
     Populate modules subdictionaries with the needed molecule `mol_` parameters.
 
@@ -1079,7 +1104,7 @@ def populate_mol_parameters(modules_params: ParamMap) -> None:
         Alter the dictionary in place.
     """
     # the starting number of the `mol_` parameters is 1 by CNS definition.
-    num_mols = range(1, len(modules_params["topoaa.1"]["molecules"]) + 1)
+    num_mols = range(1, len(topology_params["molecules"]) + 1)
     for module_name, _ in modules_params.items():
         # read the modules default parameters
         defaults = _read_defaults(module_name)
diff --git a/src/haddock/gear/preprocessing.py b/src/haddock/gear/preprocessing.py
index 1df1db513..ecaabe2ca 100644
--- a/src/haddock/gear/preprocessing.py
+++ b/src/haddock/gear/preprocessing.py
@@ -386,14 +386,15 @@ def process_pdbs(
             residues=set.union(
                 supported_HETATM,
                 user_supported_residues or set(),
+                ),
             ),
-        ),
         convert_HETATM_to_ATOM,
         partial(wrep_pdb_fixinsert, option_list=[]),
         #####
         partial(
-            remove_unsupported_hetatm, user_defined=user_supported_residues
-        ),  # noqa: E501
+            remove_unsupported_hetatm,
+            user_defined=user_supported_residues,
+            ),
         partial(remove_unsupported_atom),
         ####
         # partial(wrep_pdb_shiftres, shifting_factor=0),
@@ -401,7 +402,7 @@ def process_pdbs(
         wrep_pdb_tidy,
         ###
         wrep_rstrip,
-    ]
+        ]
 
     # these functions take the whole PDB content, evaluate it, and
     # modify it if needed.
@@ -455,7 +456,7 @@ def process_pdbs(
 wrep_pdb_tidy_strict = _report("pdb_tidy")(partial(pdb_tidy.run, strict=True))
 wrep_rstrip = _report("str.rstrip")(
     partial(map, lambda x: x.rstrip(linesep))
-)  # noqa: E501
+    )
 
 
 @_report("Replacing HETATM to ATOM for residue {!r}")
diff --git a/src/haddock/libs/libio.py b/src/haddock/libs/libio.py
index 5b32abeec..948cddb4f 100644
--- a/src/haddock/libs/libio.py
+++ b/src/haddock/libs/libio.py
@@ -20,7 +20,7 @@
     Iterable,
     Mapping,
     Optional,
-)
+    )
 from haddock.libs.libontology import PDBFile
 from haddock.libs.libutil import sort_numbered_paths
 
@@ -130,8 +130,8 @@ def open_files_to_lines(*files: FilePath) -> list[list[str]]:
 
 
 def save_lines_to_files(
-    files: Iterable[FilePath], lines: Iterable[Iterable[str]]
-) -> None:
+        files: Iterable[FilePath], lines: Iterable[Iterable[str]]
+        ) -> None:
     """
     Save a list of list of lines to files.
 
@@ -156,8 +156,8 @@ def save_lines_to_files(
 
 
 def add_suffix_to_files(
-    files: Iterable[FilePath], suffix: str
-) -> Generator[Path, None, None]:
+        files: Iterable[FilePath], suffix: str
+        ) -> Generator[Path, None, None]:
     """
     Add a suffix to file paths.
 
@@ -176,11 +176,11 @@ def add_suffix_to_files(
 
 
 def write_dic_to_file(
-    data_dict: Mapping[Any, Any],
-    output_fname: FilePath,
-    info_header: str = "",
-    sep: str = "\t",
-) -> None:
+        data_dict: Mapping[Any, Any],
+        output_fname: FilePath,
+        info_header: str = "",
+        sep: str = "\t",
+        ) -> None:
     """
     Create a table from a dictionary.
 
@@ -219,11 +219,11 @@ def write_dic_to_file(
 
 
 def write_nested_dic_to_file(
-    data_dict: Mapping[Any, Any],
-    output_fname: FilePath,
-    info_header: str = "",
-    sep: str = "\t",
-) -> None:
+        data_dict: Mapping[Any, Any],
+        output_fname: FilePath,
+        info_header: str = "",
+        sep: str = "\t",
+        ) -> None:
     """
     Create a table from a nested dictionary.
 
@@ -279,8 +279,8 @@ def working_directory(path: FilePath) -> Generator[None, None, None]:
 
 
 def compress_files_ext(
-    path: FilePath, ext: str, ncores: int = 1, **kwargs: Any
-) -> bool:
+        path: FilePath, ext: str, ncores: int = 1, **kwargs: Any
+        ) -> bool:
     """
     Compress all files with same extension in folder to `.gz`.
 
@@ -318,11 +318,11 @@ def compress_files_ext(
 
 
 def gzip_files(
-    file_: FilePath,
-    block_size: Optional[int] = None,
-    compresslevel: int = 9,
-    remove_original: bool = False,
-) -> None:
+        file_: FilePath,
+        block_size: Optional[int] = None,
+        compresslevel: int = 9,
+        remove_original: bool = False,
+        ) -> None:
     """
     Gzip a file.
 
@@ -343,8 +343,8 @@ def gzip_files(
 
     gfile = str(file_) + ".gz"
     with open(file_, "rb") as fin, gzip.open(
-        gfile, mode="wb", compresslevel=compresslevel
-    ) as gout:
+            gfile, mode="wb", compresslevel=compresslevel
+            ) as gout:
         content = fin.read(block_size)  # read the first
         while content:
             gout.write(content)
@@ -382,10 +382,10 @@ def archive_files_ext(path: FilePath, ext: str, compresslevel: int = 9) -> bool:
 
     if files:
         with tarfile.open(
-            Path(path, f"{ext}.tgz"),
-            mode="w:gz",
-            compresslevel=compresslevel,
-        ) as tarout:
+                Path(path, f"{ext}.tgz"),
+                mode="w:gz",
+                compresslevel=compresslevel,
+                ) as tarout:
             for file_ in files:
                 tarout.add(file_, arcname=file_.name)
 
@@ -439,10 +439,10 @@ def remove_files_with_ext(folder: FilePath, ext: str) -> None:
 
 
 def folder_exists(
-    path: FilePath,
-    exception: type[Exception] = ValueError,
-    emsg: str = "The folder {!r} does not exist or is not a folder.",
-) -> Path:
+        path: FilePath,
+        exception: type[Exception] = ValueError,
+        emsg: str = "The folder {!r} does not exist or is not a folder.",
+        ) -> Path:
     """
     Assert if a folder exist.
 
@@ -482,10 +482,10 @@ def folder_exists(
 
 
 def file_exists(
-    path: FilePath,
-    exception: type[Exception] = ValueError,
-    emsg: str = "`path` is not a file or does not exist",
-) -> Path:
+        path: FilePath,
+        exception: type[Exception] = ValueError,
+        emsg: str = "`path` is not a file or does not exist",
+        ) -> Path:
     """
     Assert if file exist.
 
diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py
index 0dc267765..3c4317406 100644
--- a/src/haddock/libs/libontology.py
+++ b/src/haddock/libs/libontology.py
@@ -1,16 +1,25 @@
 """Describe the Haddock3 ontology used for communicating between modules."""
 import datetime
 import itertools
+import os
+import re
 from enum import Enum
 from os import linesep
+from os.path import getmtime
 from pathlib import Path
 
-
 import jsonpickle
 
 from haddock.core.defaults import MODULE_IO_FILE
-from haddock.core.typing import FilePath, Literal, Optional, TypeVar, Union
-from typing import List, Any
+from haddock.core.typing import (
+    Any,
+    FilePath,
+    List,
+    Optional,
+    TypeVar,
+    Union,
+    )
+from haddock.libs import libpdb
 
 
 NaN = float("nan")
@@ -34,13 +43,13 @@ class Persistent:
     """Any persistent file generated by this framework."""
 
     def __init__(
-        self,
-        file_name: FilePath,
-        file_type: Format,
-        path: FilePath = ".",
-        md5: Optional[str] = None,
-        restr_fname: Optional[FilePath] = None,
-    ) -> None:
+            self,
+            file_name: FilePath,
+            file_type: Format,
+            path: FilePath = ".",
+            md5: Optional[str] = None,
+            restr_fname: Optional[FilePath] = None,
+            ) -> None:
         self.created = datetime.datetime.now().isoformat(" ", "seconds")
         self.file_name = Path(file_name).name
         self.file_type = file_type
@@ -52,8 +61,9 @@ def __init__(
 
     def __repr__(self) -> str:
         rep = (
-            f"[{self.file_type}|{self.created}] " f"{Path(self.path) / self.file_name}"
-        )
+            f"[{self.file_type}|{self.created}] "
+            f"{Path(self.path) / self.file_name}"
+            )
         return rep
 
     def is_present(self) -> bool:
@@ -65,15 +75,15 @@ class PDBFile(Persistent):
     """Represent a PDB file."""
 
     def __init__(
-        self,
-        file_name: Union[Path, str],
-        topology: Optional[Any] = None,
-        path: Union[Path, str] = ".",
-        score: float = NaN,
-        md5: Optional[str] = None,
-        restr_fname: Optional[Union[Path, str]] = None,
-        unw_energies: Optional[dict[str, float]] = None,
-    ) -> None:
+            self,
+            file_name: Union[Path, str],
+            topology: Optional[Any] = None,
+            path: Union[Path, str] = ".",
+            score: float = NaN,
+            md5: Optional[str] = None,
+            restr_fname: Optional[Union[Path, str]] = None,
+            unw_energies: Optional[dict[str, float]] = None,
+            ) -> None:
         super().__init__(file_name, Format.PDB, path, md5, restr_fname)
 
         self.topology = topology
@@ -101,7 +111,12 @@ def __hash__(self) -> int:
 class RMSDFile(Persistent):
     """Represents a RMSD matrix file."""
 
-    def __init__(self, file_name: FilePath, npairs: int, path: FilePath = ".") -> None:
+    def __init__(
+            self,
+            file_name: FilePath,
+            npairs: int,
+            path: FilePath = ".",
+            ) -> None:
         super().__init__(file_name, Format.MATRIX, path)
         self.npairs = npairs
 
@@ -136,7 +151,11 @@ def add(self, persistent, mode="i"):
             else:
                 self.output.append(persistent)
 
-    def save(self, path: FilePath = ".", filename: FilePath = MODULE_IO_FILE) -> Path:
+    def save(
+            self,
+            path: FilePath = ".",
+            filename: FilePath = MODULE_IO_FILE,
+            ) -> Path:
         """Save Input/Output needed files by this module to disk."""
         fpath = Path(path, filename)
         with open(fpath, "w") as output_handler:
@@ -147,14 +166,15 @@ def save(self, path: FilePath = ".", filename: FilePath = MODULE_IO_FILE) -> Pat
 
     def load(self, filename: FilePath) -> None:
         """Load the content of a given IO filename."""
-        with open(filename) as json_file:
-            content = jsonpickle.decode(json_file.read())
-            self.input = content["input"]  # type: ignore
-            self.output = content["output"]  # type: ignore
+        if filename.is_file():
+            with open(filename) as json_file:
+                content = jsonpickle.decode(json_file.read())
+                self.input = content["input"]  # type: ignore
+                self.output = content["output"]  # type: ignore
 
     def retrieve_models(
-        self, crossdock: bool = False, individualize: bool = False
-    ) -> list[Union[PDBFile, list[PDBFile]]]:
+            self, crossdock: bool = False, individualize: bool = False
+            ) -> list[Union[PDBFile, list[PDBFile]]]:
         """Retrieve the PDBobjects to be used in the module."""
         # Get the models generated in previous step
         model_list: list[PDBFile] = []
@@ -168,7 +188,6 @@ def retrieve_models(
 
             elif element.file_type == Format.PDB:  # type: ignore
                 model_list.append(element)  # type: ignore
-
         if input_dic and not crossdock and not individualize:
             # check if all ensembles contain the same number of models
             sub_lists = iter(input_dic.values())
@@ -177,13 +196,17 @@ def retrieve_models(
                 _msg = (
                     "Different number of models in molecules,"
                     " cannot prepare pairwise complexes."
-                )
+                    )
                 raise Exception(_msg)
 
             # prepare pairwise combinations
-            model_list = [values for values in zip(*input_dic.values())]  # type: ignore
+            model_list = [
+                values for values in zip(*input_dic.values())
+                ]  # type: ignore
         elif input_dic and crossdock and not individualize:
-            model_list = [values for values in itertools.product(*input_dic.values())]  # type: ignore
+            model_list = [
+                values for values in itertools.product(*input_dic.values())
+                ]  # type: ignore
         elif input_dic and individualize:
             model_list = list(itertools.chain(*input_dic.values()))
 
@@ -231,11 +254,148 @@ def remove_missing(self) -> None:
                 if not element.is_present():
                     idxs.append(idx)
 
-        self.output = [value for i, value in enumerate(self.output) if i not in idxs]
+        self.output = [
+            value for i, value in enumerate(self.output)
+            if i not in idxs
+            ]
 
     def __repr__(self) -> str:
         return f"Input: {self.input}{linesep}Output: {self.output}"
 
+    def load_from_input_molecules(
+            self,
+            input_molecules_dir: Path,
+            ) -> None:
+        """Load first molecules at the stat of the workflow.
+
+        Parameters
+        ----------
+        input_molecules_dir : Path
+            Directory where the input molecules are stored.
+        """
+        # Gather all input molecules
+        input_molecules = list(input_molecules_dir.glob('*.pdb'))
+        # Sort them by creation date (which is also input order)
+        input_molecules.sort(key=getmtime)  # FIXME: getctime ?
+        # Set input attribute
+        self.input = input_molecules
+
+        # Set parsing variables
+        molecules_dic: dict[int, dict[int, PDBFile]] = {}
+        # Loop over input molecules
+        for i, molecule in enumerate(self.input, start=1):
+            # Split models (these come already sorted)
+            splited_models = libpdb.split_ensemble(
+                molecule,
+                dest=input_molecules_dir,
+                )
+            # get the MD5 hash of each model
+            md5_dic = self.get_md5(molecule)
+            origin_names = self.get_ensemble_origin(molecule)
+            # Initiate with empty list
+            molecules_dic.setdefault(i, {})
+            # Loop over conformers of this ensemble
+            for j, model in enumerate(splited_models):
+                processed_model = model
+                model_name = model.stem
+                # Search of md5 information
+                md5_hash = None
+                try:
+                    model_id = int(model_name.split("_")[-1])
+                except ValueError:
+                    model_id = 0
+                if model_id in md5_dic:
+                    md5_hash = md5_dic[model_id]
+                # Check if origin or md5 is available
+                if md5_hash or model_id in origin_names.keys():
+                    # Select prefix
+                    if md5_hash:  # Prioritize the md5 hash
+                        prefix_name = md5_hash
+                    else:
+                        prefix_name = origin_names[model_id]
+                    # Build new filename
+                    model_new_name = f"{prefix_name}_from_{model_name}"
+                    # Rename file
+                    processed_model = model.rename(
+                        Path(
+                            input_molecules_dir,
+                            f"{model_new_name}.{Format.PDB}",
+                            )
+                        )
+                # Create a PDBFile object
+                pdbfile = PDBFile(
+                    processed_model,
+                    md5=md5_hash,
+                    )
+                # Modify relative path attribute
+                pdbfile.rel_path = Path(
+                    "..",
+                    input_molecules_dir,
+                    pdbfile.file_name
+                    )
+                # Set origin name
+                pdbfile.ori_name = molecule
+                # Hold that conformer/model
+                molecules_dic[i][j] = pdbfile
+        # And fake them to be the output of the previous io
+        self.output = list(molecules_dic.values())
+    
+    @staticmethod
+    def get_md5(ensemble_f: FilePath) -> dict[int, str]:
+        """Get MD5 hash of a multi-model PDB file."""
+        md5_dic: dict[int, str] = {}
+        text = Path(ensemble_f).read_text()
+        lines = text.split(os.linesep)
+        REMARK_lines = (line for line in lines if line.startswith("REMARK"))
+        remd5 = re.compile(r"^[a-f0-9]{32}$")
+        for line in REMARK_lines:
+            parts = line.strip().split()
+
+            try:
+                idx = parts.index("MODEL")
+            except ValueError:  # MODEL not in parts, this line can be ignored
+                continue
+
+            # check if there's a md5 hash in line
+            for part in parts:
+                group = remd5.fullmatch(part)
+                if group:
+                    # the model num comes after the MODEL
+                    model_num = int(parts[idx + 1])
+                    md5_dic[model_num] = group.string  # md5 hash
+                    break
+
+        return md5_dic
+    
+    @staticmethod
+    def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]:
+        """Try to find origin for each model in ensemble.
+
+        Parameters
+        ----------
+        ensemble_f : FilePath
+            Path to a pdb file containing an ensemble.
+
+        Returns
+        -------
+        origin_dic : dict[int, str]
+            Dictionary holding as keys the modelID and values its origin.
+        """
+        origin_dic: dict[int, str] = {}
+        text = Path(ensemble_f).read_text()
+        lines = text.split(os.linesep)
+        REMARK_lines = (line for line in lines if line.startswith("REMARK"))
+        re_origin = re.compile(
+            r"REMARK\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+(([\w_-]+\.?)+)"
+            )
+        for line in REMARK_lines:
+            if (match := re_origin.search(line)):
+                model_num = int(match.group(1).strip())
+                original_path = match.group(3).strip()
+                original_name = Path(original_path).stem
+                origin_dic[model_num] = original_name
+        return origin_dic
+
 
 PDBPath = Union[PDBFile, Path]
 
diff --git a/src/haddock/libs/libpdb.py b/src/haddock/libs/libpdb.py
index 1ced78d8a..7d7e4ec6b 100644
--- a/src/haddock/libs/libpdb.py
+++ b/src/haddock/libs/libpdb.py
@@ -17,8 +17,11 @@
     Optional,
     Union,
     )
-from haddock.libs.libio import working_directory
-from haddock.libs.libutil import get_result_or_same_in_list, sort_numbered_paths
+from haddock.libs.libutil import (
+    get_result_or_same_in_list,
+    sort_numbered_paths,
+    working_directory,
+    )
 
 
 slc_record = slice(0, 6)
@@ -110,8 +113,10 @@ def get_supported_residues(haddock_topology: FilePath) -> list[str]:
 _to_keep = list(supported_residues)
 
 
-def split_ensemble(pdb_file_path: Path,
-                   dest: Optional[FilePath] = None) -> list[Path]:
+def split_ensemble(
+        pdb_file_path: Path,
+        dest: Optional[FilePath] = None,
+        ) -> list[Path]:
     """
     Split a multimodel PDB file into different structures.
 
@@ -119,15 +124,24 @@ def split_ensemble(pdb_file_path: Path,
     ----------
     dest : str or pathlib.Path
         Destination folder.
+    
+    Returns
+    -------
+    pdb_files_list : list[Path]
+        List of pdb file(s).
     """
     if dest is None:
         dest = Path.cwd()
-    assert pdb_file_path.is_file(), pdb_file_path
+    assert pdb_file_path.is_file(), \
+        f"File '{pdb_file_path}' could not be found in file system."
     with open(pdb_file_path) as input_handler:
         with working_directory(dest):
             split_model(input_handler)
-
-    return sort_numbered_paths(*get_new_models(pdb_file_path))
+            pdb_files_list = sort_numbered_paths(
+                *get_new_models(pdb_file_path)
+                )
+    pdb_files_path = [Path(dest, pdb_fname) for pdb_fname in pdb_files_list]
+    return pdb_files_path
 
 
 def split_by_chain(pdb_file_path: FilePath) -> list[Path]:
@@ -164,7 +178,8 @@ def swap_segid_chain(pdb_file_path: FilePath,
 def sanitize(
         pdb_file_path: FilePathT,
         overwrite: bool = True,
-        custom_topology: Optional[FilePath] = None) -> Union[FilePathT, Path]:
+        custom_topology: Optional[FilePath] = None,
+        ) -> Union[FilePathT, Path]:
     """Sanitize a PDB file."""
     if custom_topology:
         custom_res_to_keep = get_supported_residues(custom_topology)
@@ -179,7 +194,7 @@ def sanitize(
                 for tag, new_tag in _to_rename.items():
                     line = line.replace(tag, new_tag)
                 # check if this residue is known
-                res = line[17:20].strip()
+                res = line[slc_resname].strip()
                 if res and res in _to_keep:
                     good_lines.append(line)
         if len(good_lines) > 0 and good_lines[-1] != "END":
@@ -206,11 +221,11 @@ def identify_chainseg(pdb_file_path: FilePath,
         for line in input_handler:
             if line.startswith(("ATOM  ", "HETATM")):
                 try:
-                    segid = line[72:76].strip()[:1]
+                    segid = line[slc_segid].strip()[:1]
                 except IndexError:
                     segid = ""
                 try:
-                    chainid = line[21].strip()
+                    chainid = line[slc_chainid].strip()
                 except IndexError:
                     chainid = ""
 
@@ -221,7 +236,8 @@ def identify_chainseg(pdb_file_path: FilePath,
                 
                 if not segid and not chainid:
                     raise ValueError(
-                        f"Could not identify chainID or segID in pdb {pdb_file_path}, line {line}"
+                        "Could not identify chainID or segID"
+                        f" in pdb {pdb_file_path}, line {line}"
                         )
 
     if sort:
@@ -246,8 +262,10 @@ def get_new_models(pdb_file_path: FilePath) -> list[Path]:
     return new_models
 
 
-def get_pdb_file_suffix_variations(file_name: FilePath,
-                                   sep: str = "_") -> list[Path]:
+def get_pdb_file_suffix_variations(
+        file_name: FilePath,
+        sep: str = "_",
+        ) -> list[Path]:
     """
     List suffix variations of a PDB file in the current path.
 
diff --git a/src/haddock/libs/libstructure.py b/src/haddock/libs/libstructure.py
index 9f8164436..46e6d6d62 100644
--- a/src/haddock/libs/libstructure.py
+++ b/src/haddock/libs/libstructure.py
@@ -23,10 +23,12 @@ class Molecule:
         as ``file_name``.
     """
 
-    def __init__(self,
-                 file_name: Path,
-                 segid: Optional[int] = None,
-                 no_parent: bool = False) -> None:
+    def __init__(
+            self,
+            file_name: Path,
+            segid: Optional[int] = None,
+            no_parent: bool = False,
+            ) -> None:
         # the rest of the code is too dependent on the Path API
         assert isinstance(file_name, Path), \
             f"`file_name` must be pathlib.Path: {type(file_name)} given"
diff --git a/src/haddock/libs/libutil.py b/src/haddock/libs/libutil.py
index 60a07a52c..2fdbce576 100644
--- a/src/haddock/libs/libutil.py
+++ b/src/haddock/libs/libutil.py
@@ -1,6 +1,7 @@
 """General utilities."""
 import collections.abc
 import contextlib
+import os
 import re
 import shutil
 import subprocess
@@ -379,3 +380,15 @@ def recursive_convert_paths_to_strings(params: ParamMapT) -> ParamMapT:
             params[param] = value
 
     return params
+
+
+# thanks to @brianjimenez
+@contextlib.contextmanager
+def working_directory(path: FilePath) -> Generator[None, None, None]:
+    """Change working directory and returns to previous on exit."""
+    prev_cwd = Path.cwd()
+    os.chdir(path)
+    try:
+        yield
+    finally:
+        os.chdir(prev_cwd)
diff --git a/src/haddock/libs/libworkflow.py b/src/haddock/libs/libworkflow.py
index 41b4fe324..e3ef4cb65 100644
--- a/src/haddock/libs/libworkflow.py
+++ b/src/haddock/libs/libworkflow.py
@@ -17,18 +17,18 @@
 from haddock.modules import (
     modules_category,
     non_mandatory_general_parameters_defaults,
-)
+    )
 
 
 class WorkflowManager:
     """Read and execute workflows."""
 
     def __init__(
-        self,
-        workflow_params: ModuleParams,
-        start: Optional[int] = 0,
-        **other_params: Any,
-    ) -> None:
+            self,
+            workflow_params: ModuleParams,
+            start: Optional[int] = 0,
+            **other_params: Any,
+            ) -> None:
         self.start = 0 if start is None else start
         self.recipe = Workflow(workflow_params, start=0, **other_params)
         # terminate is used to synchronize the `clean` option with the
@@ -149,8 +149,13 @@ def execute(self) -> None:
 
         # Import the module given by the mode or default
         module_name = ".".join(
-            ["haddock", "modules", modules_category[self.module_name], self.module_name]
-        )
+            [
+                "haddock",
+                "modules",
+                modules_category[self.module_name],
+                self.module_name
+                ]
+            )
         module_lib = importlib.import_module(module_name)
         self.module = module_lib.HaddockModule(order=self.order, path=self.working_path)
 
diff --git a/src/haddock/modules/__init__.py b/src/haddock/modules/__init__.py
index 65f5ab833..2efd25d50 100644
--- a/src/haddock/modules/__init__.py
+++ b/src/haddock/modules/__init__.py
@@ -7,7 +7,11 @@
 from pathlib import Path
 
 from haddock import EmptyPath, log, modules_defaults_path
-from haddock.core.defaults import MODULE_IO_FILE, INTERACTIVE_RE_SUFFIX
+from haddock.core.defaults import (
+    DATA_DIRNAME,
+    INTERACTIVE_RE_SUFFIX,
+    MODULE_IO_FILE,
+    )
 from haddock.core.exceptions import ConfigurationError
 from haddock.core.typing import (
     Any,
@@ -122,7 +126,6 @@ def __init__(self, order: int, path: Path, params_fname: FilePath) -> None:
         """
         self.order = order
         self.path = path
-        self.previous_io = self._load_previous_io()
 
         # instantiate module's parameters
         self._origignal_config_file = params_fname
@@ -132,6 +135,7 @@ def __init__(self, order: int, path: Path, params_fname: FilePath) -> None:
 
         self._params: ParamDict = {}
         self.update_params(update_from_cfg_file=params_fname)
+        self.previous_io = self._load_previous_io()
 
     @property
     def params(self) -> ParamDict:
@@ -207,7 +211,7 @@ def save_config(self, path: FilePath) -> None:
         # ...
         ignore = config_mandatory_general_parameters.union(
             non_mandatory_general_parameters_defaults
-        )  # noqa: 501
+            )
         params = deepcopy(self.params)
 
         with suppress(KeyError):
@@ -262,7 +266,7 @@ def confirm_installation(cls) -> None:
         """
         return
 
-    def export_io_models(self, faulty_tolerance=0):
+    def export_io_models(self, faulty_tolerance: int = 0) -> None:
         """
         Export input/output to the ModuleIO interface.
 
@@ -296,7 +300,6 @@ def export_io_models(self, faulty_tolerance=0):
                 f"and tolerance was set to {faulty_tolerance:.2f}%."
                 )
             self.finish_with_error(_msg)
-        
 
     def finish_with_error(self, reason: object = "Module has failed.") -> None:
         """Finish with error message."""
@@ -310,32 +313,44 @@ def _load_previous_io(
             self,
             filename: FilePath = MODULE_IO_FILE,
             ) -> ModuleIO:
-        if self.order == 0:
-            self._num_of_input_molecules = 0
-            return ModuleIO()
-
         io = ModuleIO()
-        previous_io = Path(self.previous_path(), filename)
-
-        if previous_io.is_file():
+        # In case of the first step in the workflow
+        if self.order == 0:
+            self._load_first_io(io)
+        else:
+            previous_io = Path(self.previous_path(), filename)
             io.load(previous_io)
-
+        # Count number of molecules
         self._num_of_input_molecules = len(io.output)
-
         return io
+    
+    def _load_first_io(self, io: ModuleIO) -> None:
+        """Provide the first ModuleIO.
+
+        Parameters
+        ----------
+        io : ModuleIO
+            The content of the step -1 moduleIO object.
+        """
+        # Point input molecules path
+        input_molecules_dir = Path(DATA_DIRNAME, self.path)
+        # Generate pdb files
+        io.load_from_input_molecules(input_molecules_dir)
 
     def previous_path(self) -> Path:
         """Give the path from the previous calculation."""
         previous = get_module_steps_folders(self.path.resolve().parent)
 
         try:
-            # return Path(previous[self.order - 1])
             return self.last_step_folder(previous, self.order - 1)
         except IndexError:
             return self.path
 
     @staticmethod
-    def last_step_folder(folders, index):
+    def last_step_folder(
+            folders: list[str],
+            index: int,
+            ) -> Optional[str]:
         """Retrieve last step folder."""
         with_ind = [
             folder for folder in folders
diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py
index 2322df408..d85e90dd6 100644
--- a/src/haddock/modules/topology/topoaa/__init__.py
+++ b/src/haddock/modules/topology/topoaa/__init__.py
@@ -1,7 +1,5 @@
 """Create and manage CNS all-atom topology."""
 import operator
-import os
-import re
 from functools import partial
 from pathlib import Path
 
@@ -12,9 +10,8 @@
     load_workflow_params,
     prepare_output,
     prepare_single_input,
-)
+    )
 from haddock.libs.libontology import Format, PDBFile, TopologyFile
-from haddock.libs.libstructure import make_molecules
 from haddock.libs.libsubprocess import CNSJob
 from haddock.modules import get_engine
 from haddock.modules.base_cns_module import BaseCNSModule
@@ -25,12 +22,12 @@
 
 
 def generate_topology(
-    input_pdb: Path,
-    recipe_str: str,
-    defaults: ParamMap,
-    mol_params: ParamMap,
-    default_params_path: Optional[FilePath] = None,
-) -> Path:
+        input_pdb: Path,
+        recipe_str: str,
+        defaults: ParamMap,
+        mol_params: ParamMap,
+        default_params_path: Optional[FilePath] = None,
+        ) -> Path:
     """Generate a HADDOCK topology file from input_pdb."""
     # generate params headers
     general_param = load_workflow_params(**defaults)
@@ -40,12 +37,12 @@ def generate_topology(
     # generate default headers
     link, trans_vec, tensor, scatter, axis, water_box = generate_default_header(
         path=default_params_path
-    )
+        )
 
     output = prepare_output(
         output_pdb_filename=f"{input_pdb.stem}_haddock{input_pdb.suffix}",
         output_psf_filename=f"{input_pdb.stem}_haddock.{Format.TOPOLOGY}",
-    )
+        )
 
     input_str = prepare_single_input(str(input_pdb))
 
@@ -60,7 +57,7 @@ def generate_topology(
         axis,
         water_box,
         recipe_str,
-    )
+        )
 
     inp = "".join(inp_parts)
 
@@ -76,8 +73,11 @@ class HaddockModule(BaseCNSModule):
     name = RECIPE_PATH.name
 
     def __init__(
-        self, order: int, path: Path, initial_params: FilePath = DEFAULT_CONFIG
-    ) -> None:
+            self,
+            order: int,
+            path: Path,
+            initial_params: FilePath = DEFAULT_CONFIG,
+            ) -> None:
         cns_script = RECIPE_PATH / "cns" / "generate-topology.cns"
         super().__init__(order, path, initial_params, cns_script=cns_script)
 
@@ -86,73 +86,24 @@ def confirm_installation(cls) -> None:
         """Confirm if module is installed."""
         return
 
-    @staticmethod
-    def get_md5(ensemble_f: FilePath) -> dict[int, str]:
-        """Get MD5 hash of a multi-model PDB file."""
-        md5_dic: dict[int, str] = {}
-        text = Path(ensemble_f).read_text()
-        lines = text.split(os.linesep)
-        REMARK_lines = (line for line in lines if line.startswith("REMARK"))
-        remd5 = re.compile(r"^[a-f0-9]{32}$")
-        for line in REMARK_lines:
-            parts = line.strip().split()
-
-            try:
-                idx = parts.index("MODEL")
-            except ValueError:  # MODEL not in parts, this line can be ignored
-                continue
-
-            # check if there's a md5 hash in line
-            for part in parts:
-                group = remd5.fullmatch(part)
-                if group:
-                    # the model num comes after the MODEL
-                    model_num = int(parts[idx + 1])
-                    md5_dic[model_num] = group.string  # md5 hash
-                    break
-
-        return md5_dic
-    
-    @staticmethod
-    def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]:
-        """Try to find origin for each model in ensemble.
-
-        Parameters
-        ----------
-        ensemble_f : FilePath
-            Path to a pdb file containing an ensemble.
-
-        Returns
-        -------
-        origin_dic : dict[int, str]
-            Dictionary holding as keys the modelID and values its origin.
-        """
-        origin_dic: dict[int, str] = {}
-        text = Path(ensemble_f).read_text()
-        lines = text.split(os.linesep)
-        REMARK_lines = (line for line in lines if line.startswith("REMARK"))
-        re_origin = re.compile("REMARK\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+(([\w_-]+\.?)+)")  # noqa : E501
-        for line in REMARK_lines:
-            if (match := re_origin.search(line)):
-                model_num = int(match.group(1).strip())
-                original_path = match.group(3).strip()
-                original_name = Path(original_path).stem
-                origin_dic[model_num] = original_name
-        return origin_dic
-
-
     def _run(self) -> None:
         """Execute module."""
+        md5_dic: dict[int, dict[int, str]] = {}
+        self.params.pop("molecules")
+        molecules: list[list[Path]] = []
         if self.order == 0:
-            # topoaa is the first step in the workflow
-            molecules = make_molecules(self.params.pop("molecules"))
-
+            _molecules = self.previous_io.output
+            for i, models in enumerate(_molecules, start=1):
+                molecules.append([model.rel_path for model in models.values()])
+                md5_dic[i] = {
+                    j: model.md5
+                    for j, model in enumerate(models.values())
+                    }
         else:
             # in case topoaa is not the first step, the topology is rebuilt for
             # each retrieved model
             _molecules = self.previous_io.retrieve_models()
-            molecules_paths: list[Path] = [mol.rel_path for mol in _molecules]  # type: ignore
-            molecules = make_molecules(molecules_paths, no_parent=True)
+            molecules = [[mol.rel_path] for mol in _molecules]
 
         # extracts `input` key from params. The `input` keyword needs to
         # be treated separately
@@ -161,62 +112,40 @@ def _run(self) -> None:
             if k.startswith("mol") and k[3:].isdigit():
                 mol_params[k] = self.params.pop(k)
 
-        # to facilitate the for loop down the line, we create a list with the 
+        # to facilitate the for loop down the line, we create a list with the
         #  keys of `mol_params` with inverted order (we will use .pop)
         mol_params_keys = list(mol_params.keys())[::-1]
 
         # limit is only useful when order == 0
         if self.order == 0 and self.params["limit"]:
             mol_params_get = mol_params_keys.pop
-
         # `else` is used in any case where limit is False.
         else:
             mol_params_get = partial(operator.getitem, mol_params_keys, -1)
 
         # Pool of jobs to be executed by the CNS engine
         jobs: list[CNSJob] = []
-
         models_dic: dict[int, list[Path]] = {}
-        ens_dic: dict[int, dict[int, str]] = {}
-        origi_ens_dic: dict[int, dict[int, str]] = {}
-        for i, molecule in enumerate(molecules, start=1):
-            self.log(f"Molecule {i}: {molecule.file_name.name}")
+
+        for i, models in enumerate(molecules, start=1):
+            self.log(f"Molecule {i}")
             models_dic[i] = []
-            # Copy the molecule to the step folder
-
-            # Split models
-            self.log(
-                f"Split models if needed for {molecule.with_parent}",
-                level="debug",
-            )
-            # these come already sorted
-            splited_models = libpdb.split_ensemble(
-                molecule.with_parent,
-                dest=Path.cwd(),
-            )
-
-            # get the MD5 hash of each model
-            ens_dic[i] = self.get_md5(molecule.with_parent)
-            origi_ens_dic[i] = self.get_ensemble_origin(molecule.with_parent)
             # nice variable name, isn't it? :-)
             # molecule parameters are shared among models of the same molecule
             parameters_for_this_molecule = mol_params[mol_params_get()]
 
-            for model in splited_models:
-                self.log(f"Sanitizing molecule {model.name}")
-                models_dic[i].append(model)
-
+            for model in models:
+                self.log(f"Sanitizing model {model.name}")
+                custom_top: Optional[FilePath] = None
                 if self.params["ligand_top_fname"]:
                     custom_top = self.params["ligand_top_fname"]
                     self.log(f"Using custom topology {custom_top}")
-                    libpdb.sanitize(
-                        model,
-                        overwrite=True,
-                        custom_topology=custom_top,
-                        )
-
-                else:
-                    libpdb.sanitize(model, overwrite=True)
+                libpdb.sanitize(
+                    model,
+                    overwrite=True,
+                    custom_topology=custom_top,
+                    )
+                models_dic[i].append(model)
 
                 # Prepare generation of topologies jobs
                 topology_filename = generate_topology(
@@ -225,7 +154,7 @@ def _run(self) -> None:
                     self.params,
                     parameters_for_this_molecule,
                     default_params_path=self.toppar_path,
-                )
+                    )
 
                 self.log(
                     f"Topology CNS input created in {topology_filename.name}"
@@ -239,7 +168,7 @@ def _run(self) -> None:
                     output_filename,
                     envvars=self.envvars,
                     cns_exec=self.params["cns_exec"],
-                )
+                    )
 
                 jobs.append(job)
 
@@ -250,54 +179,27 @@ def _run(self) -> None:
         engine.run()
         self.log("CNS jobs have finished")
 
-        # Check for generated output, fail it not all expected files
+        # Check for generated output, fail if not all expected files
         #  are found
         expected: dict[int, dict[int, PDBFile]] = {}
-        for i in models_dic:
-            expected[i] = {}
-            md5_dic = ens_dic[i]
-            origin_names = origi_ens_dic[i]
-            for j, model in enumerate(models_dic[i]):
-                md5_hash = None
-                try:
-                    model_id = int(model.stem.split("_")[-1])
-                except ValueError:
-                    model_id = 0
-
-                if model_id in md5_dic:
-                    md5_hash = md5_dic[model_id]
-
+        for i, models in models_dic.items():
+            expected.setdefault(i, {})
+            for j, model in enumerate(models):
                 model_name = model.stem
                 processed_pdb = Path(f"{model_name}_haddock.{Format.PDB}")
                 processed_topology = Path(
                     f"{model_name}_haddock.{Format.TOPOLOGY}"
                     )
-
-                # Check if origin or md5 is available
-                if md5_hash or model_id in origin_names.keys():
-                    # Select prefix
-                    if md5_hash:
-                        prefix_name = md5_hash
-                    else:
-                        prefix_name = origin_names[model_id]
-                    # Check if topology and file created
-                    if processed_pdb.exists() and processed_topology.exists():
-                        # Build new filename
-                        model_name = f"{prefix_name}_from_{model_name}"
-                        # Rename files
-                        processed_pdb = processed_pdb.rename(
-                            f"{model_name}_haddock.{Format.PDB}"
-                            )
-                        processed_topology = processed_topology.rename(
-                            f"{model_name}_haddock.{Format.TOPOLOGY}"
-                            )
-
                 topology = TopologyFile(processed_topology, path=".")
+                try:
+                    md5 = md5_dic[i][j]
+                except KeyError:
+                    md5 = None
                 pdb = PDBFile(
                     file_name=processed_pdb,
                     topology=topology,
                     path=".",
-                    md5=md5_hash,
+                    md5=md5,
                     )
                 pdb.ori_name = model.stem
                 expected[i][j] = pdb
diff --git a/src/haddock/modules/topology/topoaa/defaults.yaml b/src/haddock/modules/topology/topoaa/defaults.yaml
index 237089432..c2eebea16 100644
--- a/src/haddock/modules/topology/topoaa/defaults.yaml
+++ b/src/haddock/modules/topology/topoaa/defaults.yaml
@@ -166,4 +166,3 @@ molecules:
     and type values of this parameter are meaningless.
   explevel: hidden
   group: molecules
-
diff --git a/tests/test_libworkflow.py b/tests/test_libworkflow.py
index 88e52724c..46e2aac9a 100644
--- a/tests/test_libworkflow.py
+++ b/tests/test_libworkflow.py
@@ -1,6 +1,7 @@
 """Uni-test functions for the Workflow Manager."""
 
 import tempfile
+from haddock.libs.libutil import working_directory
 from haddock.libs.libworkflow import WorkflowManager
 from haddock.core.typing import Any
 
@@ -19,13 +20,14 @@ def test_WorkflowManager(caplog):
             }
         }
     with tempfile.TemporaryDirectory(dir=".") as _tmpdir:
-        workflow = WorkflowManager(
-            ParamDict,
-            start=0,
-            other_params=Any,
-            )
-        workflow.postprocess()
-        first_log_line = str(caplog.records[0].message)
-        second_log_line = str(caplog.records[1].message)
-        assert first_log_line == "Reading instructions step 0_topoaa"
-        assert second_log_line == "Running haddock3-analyse on ./, modules [], with top_cluster = 10"  # noqa : E501
+        with working_directory(_tmpdir):
+            workflow = WorkflowManager(
+                ParamDict,
+                start=0,
+                other_params=Any,
+                )
+            workflow.postprocess()
+            first_log_line = str(caplog.records[0].message)
+            second_log_line = str(caplog.records[1].message)
+            assert first_log_line == "Reading instructions step 0_topoaa"
+            assert second_log_line == "Running haddock3-analyse on ./, modules [], with top_cluster = 10"  # noqa : E501

From 727c23fc9843481a7f467c3da79b296961278269 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 3 Jul 2024 17:07:24 +0200
Subject: [PATCH 03/32] fix types

---
 src/haddock/libs/libontology.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py
index 3c4317406..ee5cfe77a 100644
--- a/src/haddock/libs/libontology.py
+++ b/src/haddock/libs/libontology.py
@@ -200,13 +200,9 @@ def retrieve_models(
                 raise Exception(_msg)
 
             # prepare pairwise combinations
-            model_list = [
-                values for values in zip(*input_dic.values())
-                ]  # type: ignore
+            model_list = [values for values in zip(*input_dic.values())]  # type: ignore
         elif input_dic and crossdock and not individualize:
-            model_list = [
-                values for values in itertools.product(*input_dic.values())
-                ]  # type: ignore
+            model_list = [values for values in itertools.product(*input_dic.values())]  # type: ignore
         elif input_dic and individualize:
             model_list = list(itertools.chain(*input_dic.values()))
 

From c19b2cdd5153ac1c99545fad93e4bffd1be57fd1 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 14:35:16 +0200
Subject: [PATCH 04/32] remove E203 and E501 lint checks from
 examples/run_tests.py

---
 examples/run_tests.py | 64 +++++++++++++++++++++----------------------
 tox.ini               |  3 +-
 2 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/examples/run_tests.py b/examples/run_tests.py
index 938a0165a..6ec8ddaa2 100644
--- a/examples/run_tests.py
+++ b/examples/run_tests.py
@@ -47,34 +47,34 @@
 # keys are the examples folder, and values are the configuration files
 # the whitespaces below are anti-pythonic but facilitate reading :-)
 examples = (
-    ("docking-antibody-antigen"    , "docking-antibody-antigen-ranairCDR-test.cfg"),  # noqa: E203, E501
-    ("docking-antibody-antigen"    , "docking-antibody-antigen-ranairCDR-clt-test.cfg"),  # noqa: E203, E501
-    ("docking-antibody-antigen"    , "docking-antibody-antigen-CDR-accessible-test.cfg"),  # noqa: E203, E501
-    ("docking-antibody-antigen"    , "docking-antibody-antigen-CDR-accessible-clt-test.cfg"),  # noqa: E203, E501
-    ("docking-antibody-antigen"    , "docking-antibody-antigen-CDR-NMR-CSP-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-DNA"         , "docking-protein-DNA-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-DNA"         , "docking-protein-DNA-mdref-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-homotrimer"  , "docking-protein-homotrimer-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-glycan"      , "docking-protein-glycan-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-glycan"      , "docking-protein-glycan-ilrmsd-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-glycan"      , "docking-flexref-protein-glycan-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-ligand-shape", "docking-protein-ligand-shape-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-ligand"      , "docking-protein-ligand-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-peptide"     , "docking-protein-peptide-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-peptide"     , "docking-protein-peptide-mdref-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-protein"     , "docking-protein-protein-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-protein"     , "docking-protein-protein-cltsel-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-protein"     , "docking-protein-protein-mdref-test.cfg"),  # noqa: E203, E501
-    ("docking-multiple-ambig"      , "docking-multiple-tbls-test.cfg"),  # noqa: E203, E501
-    ("docking-protein-protein"     , "docking-exit-test.cfg"),  # noqa: E203, E501
-    ("refine-complex"              , "refine-complex-test.cfg"),  # noqa: E203, E501
-    ("scoring"                     , "emscoring-test.cfg"),  # noqa: E203, E501
-    ("scoring"                     , "mdscoring-test.cfg"),  # noqa: E203, E501
-    ("scoring"                     , "emscoring-mdscoring-test.cfg"),  # noqa: E203, E501
-    ("analysis"                    , "topoaa-caprieval-test.cfg"),  # noqa: E203, E501
-    ("analysis"                    , "topoaa-ilrmsdmatrix-clustrmsd-test.cfg"),  # noqa: E203, E501
-    ("analysis"                    , "alascan-test.cfg"),  # noqa: E203, E501
-    ("analysis"                    , "contmap-test.cfg"),  # noqa: E203, E501
+    ("docking-antibody-antigen"    , "docking-antibody-antigen-ranairCDR-test.cfg"),
+    ("docking-antibody-antigen"    , "docking-antibody-antigen-ranairCDR-clt-test.cfg"),
+    ("docking-antibody-antigen"    , "docking-antibody-antigen-CDR-accessible-test.cfg"),
+    ("docking-antibody-antigen"    , "docking-antibody-antigen-CDR-accessible-clt-test.cfg"),
+    ("docking-antibody-antigen"    , "docking-antibody-antigen-CDR-NMR-CSP-test.cfg"),
+    ("docking-protein-DNA"         , "docking-protein-DNA-test.cfg"),
+    ("docking-protein-DNA"         , "docking-protein-DNA-mdref-test.cfg"),
+    ("docking-protein-homotrimer"  , "docking-protein-homotrimer-test.cfg"),
+    ("docking-protein-glycan"      , "docking-protein-glycan-test.cfg"),
+    ("docking-protein-glycan"      , "docking-protein-glycan-ilrmsd-test.cfg"),
+    ("docking-protein-glycan"      , "docking-flexref-protein-glycan-test.cfg"),
+    ("docking-protein-ligand-shape", "docking-protein-ligand-shape-test.cfg"),
+    ("docking-protein-ligand"      , "docking-protein-ligand-test.cfg"),
+    ("docking-protein-peptide"     , "docking-protein-peptide-test.cfg"),
+    ("docking-protein-peptide"     , "docking-protein-peptide-mdref-test.cfg"),
+    ("docking-protein-protein"     , "docking-protein-protein-test.cfg"),
+    ("docking-protein-protein"     , "docking-protein-protein-cltsel-test.cfg"),
+    ("docking-protein-protein"     , "docking-protein-protein-mdref-test.cfg"),
+    ("docking-multiple-ambig"      , "docking-multiple-tbls-test.cfg"),
+    ("docking-protein-protein"     , "docking-exit-test.cfg"),
+    ("refine-complex"              , "refine-complex-test.cfg"),
+    ("scoring"                     , "emscoring-test.cfg"),
+    ("scoring"                     , "mdscoring-test.cfg"),
+    ("scoring"                     , "emscoring-mdscoring-test.cfg"),
+    ("analysis"                    , "topoaa-caprieval-test.cfg"),
+    ("analysis"                    , "topoaa-ilrmsdmatrix-clustrmsd-test.cfg"),
+    ("analysis"                    , "alascan-test.cfg"),
+    ("analysis"                    , "contmap-test.cfg"),
     )
 
 
@@ -160,7 +160,7 @@ def main(examples, break_on_errors=True):
                 rmtree("run2", ignore_errors=True)
                 run_subprocess_cmd("haddock3-copy -r run1-test -m 0 4 -o run2")
                 run_subprocess_cmd(
-                    "haddock3 docking-extend-run-exit-test.cfg --extend-run run2",  # noqa: E501
+                    "haddock3 docking-extend-run-exit-test.cfg --extend-run run2",
                     )
 
                 # test exit with --restart
@@ -178,12 +178,12 @@ def main(examples, break_on_errors=True):
                 
                 # perform a haddock3 re-scoring command
                 run_subprocess_cmd(
-                    "haddock3-re score -e 1.1 -w 1 -d 0.3 -b 1 -a 1 run1-re/2_caprieval",  # noqa : E501
+                    "haddock3-re score -e 1.1 -w 1 -d 0.3 -b 1 -a 1 run1-re/2_caprieval",
                     )
                 
                 # perform a haddock3 re-clustfcc command
                 run_subprocess_cmd(
-                    "haddock3-re clustfcc -f 0.5 -s 0.7 -t 2 run1-re/1_clustfcc",  # noqa : E501
+                    "haddock3-re clustfcc -f 0.5 -s 0.7 -t 2 run1-re/1_clustfcc",
                     )
                 
                 # FIXME: Make this runs properly function
@@ -191,7 +191,7 @@ def main(examples, break_on_errors=True):
 
                 # perform haddock3 --extend-run on re-run
                 # run_subprocess_cmd(
-                #     "haddock3 docking-re-extend-run-test.cfg --extend-run run1-re",  # noqa : E501
+                #     "haddock3 docking-re-extend-run-test.cfg --extend-run run1-re",
                 #     )
 
                 # perform haddock3 --restart on re-run
diff --git a/tox.ini b/tox.ini
index 0ab20e6ff..c10be67a7 100644
--- a/tox.ini
+++ b/tox.ini
@@ -165,10 +165,11 @@ per-file-ignores =
     src/haddock/clis/cli_dmn.py:T201
     src/haddock/clis/cli_score.py:T201
     src/haddock/core/typing.py:F401
+    src/haddock/modules/*/*/__init__.py:D205,D400
     tests/*.py:D103
     tests/test_gear_preprocessing.py:E501,D103,W291
     tests/test_module_flexref.py:B017
-    src/haddock/modules/*/*/__init__.py:D205,D400
+    examples/run_tests.py:E203,E501
 exclude =
     src/haddock/modules/_template_cat/_template_mod/__init__.py
 docstring-convention = numpy

From 08c7fcb91e53c12ce1b1c7e5ece3a9e3d37ad6bc Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 14:36:57 +0200
Subject: [PATCH 05/32] add docstring in HaddockModel method

---
 src/haddock/gear/haddockmodel.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/haddock/gear/haddockmodel.py b/src/haddock/gear/haddockmodel.py
index 9e669d2a7..a8960d3da 100644
--- a/src/haddock/gear/haddockmodel.py
+++ b/src/haddock/gear/haddockmodel.py
@@ -5,13 +5,25 @@
 
 
 class HaddockModel:
-    """Represent HADDOCK model."""
+    """Represent HADDOCK CNS model."""
 
     def __init__(self, pdb_f: FilePath) -> None:
         self.energies = self._load_energies(pdb_f)
 
     @staticmethod
     def _load_energies(pdb_f: FilePath) -> dict[str, float]:
+        """Parse pdb file generated by CNS in search for scores.
+
+        Parameters
+        ----------
+        pdb_f : FilePath
+            Path to the pdb file
+
+        Returns
+        -------
+        dict[str, float]
+            Dictionary of the components with their unweighted values.
+        """
         energy_dic: dict[str, float] = {}
         with open(pdb_f) as fh:
             for line in fh.readlines():
@@ -38,13 +50,13 @@ def _load_energies(pdb_f: FilePath) -> dict[str, float]:
                         energy_dic['dani'] = dani
                         energy_dic['xpcs'] = xpcs
                         energy_dic['rg'] = rg
-                    if 'buried surface area' in line:
+                    elif 'buried surface area' in line:
                         bsa = float(line.rstrip().split(':')[-1])
                         energy_dic['bsa'] = bsa
-                    if 'Desolvation energy' in line:
+                    elif 'Desolvation energy' in line:
                         desolv = float(line.rstrip().split(':')[-1])
                         energy_dic['desolv'] = desolv
-                    if 'Symmetry energy' in line:
+                    elif 'Symmetry energy' in line:
                         sym = float(line.rstrip().split(':')[-1])
                         energy_dic['sym'] = sym
 

From 4285a18d5033d39d5dd15b1e070fb8e66d8236ba Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 14:38:16 +0200
Subject: [PATCH 06/32] workaround topoaa

---
 src/haddock/clis/cli_score.py                 | 137 ++++++------
 src/haddock/clis/cli_traceback.py             |   6 +-
 src/haddock/libs/libcns.py                    |   9 +-
 src/haddock/libs/libontology.py               | 205 ++++++++++++------
 src/haddock/libs/libpdb.py                    |  29 +++
 src/haddock/libs/libstructure.py              |  46 ----
 src/haddock/libs/libworkflow.py               |  10 +-
 src/haddock/modules/__init__.py               |   2 +-
 .../modules/analysis/alascan/__init__.py      |   2 +-
 src/haddock/modules/analysis/alascan/scan.py  |  31 ++-
 .../modules/topology/topoaa/__init__.py       |  79 +++----
 11 files changed, 311 insertions(+), 245 deletions(-)
 delete mode 100644 src/haddock/libs/libstructure.py

diff --git a/src/haddock/clis/cli_score.py b/src/haddock/clis/cli_score.py
index 5abb0a1e5..0f03d6394 100644
--- a/src/haddock/clis/cli_score.py
+++ b/src/haddock/clis/cli_score.py
@@ -103,7 +103,52 @@ def cli(ap: ArgumentParser, main: Callable[..., None]) -> None:
 
 def maincli() -> None:
     """Execute main client."""
-    cli(ap, main)
+    cli(_ap(), main)
+
+
+def get_parameters(kwargs: Any) -> dict[str, Any]:
+    from os import linesep
+    from haddock.gear.yaml2cfg import read_from_yaml_config
+    from haddock.modules.scoring.emscoring import DEFAULT_CONFIG
+    # config all parameters are correctly spelled.
+    default_emscoring = read_from_yaml_config(DEFAULT_CONFIG)
+    ems_dict = default_emscoring.copy()
+    n_warnings = 0
+    for param, value in kwargs.items():
+        if param not in default_emscoring:
+            sys.exit(
+                f"* ERROR * Parameter {param!r} is not a "
+                f"valid `emscoring` parameter.{linesep}"
+                "Valid emscoring parameters are: "
+                f"{', '.join(sorted(default_emscoring))}"
+                )
+        if value != default_emscoring[param]:
+            print(
+                f"* ATTENTION * Value ({value}) of parameter {param} "
+                f"different from default ({default_emscoring[param]})"
+                )
+            # get the type of default value
+            default_type = type(default_emscoring[param])
+            # convert the value to the same type
+            if default_type == bool:
+                if value.lower() not in ["true", "false"]:
+                    sys.exit(
+                        f"* ERROR * Boolean parameter {param} "
+                        "should be True or False"
+                        )
+                value = value.lower() == "true"
+            else:
+                value = default_type(value)
+            ems_dict[param] = value
+            n_warnings += 1
+    if n_warnings != 0:
+        print(
+            "* ATTENTION * Non-default parameter values were used. "
+            "They should be properly reported if the output "
+            "data are used for publication."
+            )
+        print(f"used emscoring parameters: {ems_dict}")
+    return ems_dict
 
 
 def main(
@@ -143,19 +188,16 @@ def main(
         Any additional arguments that will be passed to the ``emscoring``
         module.
     """
-    import os
     import logging
     import shutil
     from contextlib import suppress
     from pathlib import Path
 
     from haddock import log
+    from haddock.core.defaults import DATA_DIRNAME
     from haddock.gear.haddockmodel import HaddockModel
-    from haddock.gear.yaml2cfg import read_from_yaml_config
-    from haddock.gear.zerofill import zero_fill
     from haddock.libs.libio import working_directory
     from haddock.libs.libworkflow import WorkflowManager
-    from haddock.modules.scoring.emscoring import DEFAULT_CONFIG
 
     log.setLevel(logging.ERROR)
 
@@ -163,81 +205,46 @@ def main(
     if not input_pdb.exists():
         sys.exit(f"* ERROR * Input PDB file {str(input_pdb)!r} does not exist")
 
-    # config all parameters are correctly spelled.
-    default_emscoring = read_from_yaml_config(DEFAULT_CONFIG)
-    ems_dict = default_emscoring.copy()
-    n_warnings = 0
-    for param, value in kwargs.items():
-        if param not in default_emscoring:
-            sys.exit(
-                f"* ERROR * Parameter {param!r} is not a "
-                f"valid `emscoring` parameter.{os.linesep}"
-                f"Valid emscoring parameters are: {', '.join(sorted(default_emscoring))}"
-                )
-        if value != default_emscoring[param]:
-            print(
-                f"* ATTENTION * Value ({value}) of parameter {param} different from default ({default_emscoring[param]})"
-            )  # noqa:E501
-            # get the type of default value
-            default_type = type(default_emscoring[param])
-            # convert the value to the same type
-            if default_type == bool:
-                if value.lower() not in ["true", "false"]:
-                    sys.exit(f"* ERROR * Boolean parameter {param} should be True or False")
-                value = value.lower() == "true"
-            else:
-                value = default_type(value)
-            ems_dict[param] = value
-            n_warnings += 1
-    if n_warnings != 0:
-        print(
-            "* ATTENTION * Non-default parameter values were used. "
-            "They should be properly reported if the output "
-            "data are used for publication."
-            )
-        print(f"used emscoring parameters: {ems_dict}")
+    # Get parameters
+    ems_dict = get_parameters(kwargs)
 
     # create run directory
     run_dir = Path(run_dir)
     with suppress(FileNotFoundError):
         shutil.rmtree(run_dir)
     run_dir.mkdir()
-    zero_fill.set_zerofill_number(2)
-
-    # create temporary file
-    with tempfile.NamedTemporaryFile(prefix=input_pdb.stem, suffix=".pdb") as tmp:
-
-        # create a copy of the input pdb
-        input_pdb_copy = Path(tmp.name)
-        shutil.copy(input_pdb, input_pdb_copy)
-    
-        params = {
-            "topoaa": {"molecules": [input_pdb_copy]},
-            "emscoring": ems_dict,
+    # create a copy of the input pdb in run directory
+    input_molecule_dir = Path(run_dir, DATA_DIRNAME, "0_topoaa")
+    input_molecule_dir.mkdir(parents=True, exist_ok=True)
+    input_pdb_copy = Path(input_molecule_dir, input_pdb.name)
+    shutil.copy(input_pdb, input_pdb_copy)
+
+    # Set workflow parameters
+    params = {
+        "topoaa": {"molecules": [input_pdb_copy]},
+        "emscoring": ems_dict,
         }
-
+    # run workflow
+    with working_directory(run_dir):
+        workflow = WorkflowManager(
+            workflow_params=params,
+            start=0,
+            run_dir=run_dir,
+        )
         print("> starting calculations...")
+        workflow.run()
 
-        # run workflow
-        with working_directory(run_dir):
-            workflow = WorkflowManager(
-                workflow_params=params,
-                start=0,
-                run_dir=run_dir,
-            )
-
-            workflow.run()
-
-    minimized_mol = Path(run_dir, "1_emscoring", "emscoring_1.pdb")
-    haddock_score_component_dic = HaddockModel(minimized_mol).energies
-
+    # Point generated structure path
+    minimized_mol_path = Path(run_dir, "1_emscoring", "emscoring_1.pdb")
+    haddock_score_component_dic = HaddockModel(minimized_mol_path).energies
+    # Gather haddock score components
     vdw = haddock_score_component_dic["vdw"]
     elec = haddock_score_component_dic["elec"]
     desolv = haddock_score_component_dic["desolv"]
     air = haddock_score_component_dic["air"]
     bsa = haddock_score_component_dic["bsa"]
 
-    # emscoring is equivalent to itw
+    # Weight the components to obtain the HADDOCK score
     haddock_score_itw = (
         ems_dict["w_vdw"] * vdw
         + ems_dict["w_elec"] * elec
diff --git a/src/haddock/clis/cli_traceback.py b/src/haddock/clis/cli_traceback.py
index b81ee319b..14e332447 100644
--- a/src/haddock/clis/cli_traceback.py
+++ b/src/haddock/clis/cli_traceback.py
@@ -15,9 +15,9 @@
 
 import numpy as np
 import pandas as pd
-from typing import Any
 
 from haddock import log
+from haddock.core.typing import FilePath, Any
 from haddock.libs import libcli
 from haddock.libs.libontology import ModuleIO, PDBFile
 from haddock.libs.libplots import make_traceback_plot
@@ -94,7 +94,7 @@ def get_ori_names(n: int, pdbfile: PDBFile, max_topo_len: int) -> tuple[list, in
 
 def traceback_dataframe(
     data_dict: dict, rank_dict: dict, sel_step: list, max_topo_len: int
-) -> None:
+) -> pd.DataFrame:
     """
     Create traceback dataframe by combining together ranks and data.
 
@@ -242,7 +242,7 @@ def maincli():
     cli(ap, main)
 
 
-def main(run_dir):
+def main(run_dir: FilePath) -> None:
     """
     Traceback CLI.
 
diff --git a/src/haddock/libs/libcns.py b/src/haddock/libs/libcns.py
index ab72fdef2..786e9355f 100644
--- a/src/haddock/libs/libcns.py
+++ b/src/haddock/libs/libcns.py
@@ -365,9 +365,12 @@ def prepare_cns_input(
     return inp_file
 
 
-def prepare_expected_pdb(model_obj: Union[PDBFile, tuple[PDBFile,
-                                                         ...]], model_nb: int,
-                         path: FilePath, identifier: str) -> PDBFile:
+def prepare_expected_pdb(
+        model_obj: Union[PDBFile, tuple[PDBFile, ...]],
+        model_nb: int,
+        path: FilePath,
+        identifier: str,
+        ) -> PDBFile:
     """Prepare a PDBobject."""
     expected_pdb_fname = Path(path, f"{identifier}_{model_nb}.pdb")
     pdb = PDBFile(expected_pdb_fname, path=path)
diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py
index ee5cfe77a..17802bcb5 100644
--- a/src/haddock/libs/libontology.py
+++ b/src/haddock/libs/libontology.py
@@ -4,6 +4,7 @@
 import os
 import re
 from enum import Enum
+from functools import partial
 from os import linesep
 from os.path import getmtime
 from pathlib import Path
@@ -14,12 +15,13 @@
 from haddock.core.typing import (
     Any,
     FilePath,
+    Iterable,
     List,
     Optional,
     TypeVar,
     Union,
     )
-from haddock.libs import libpdb
+from haddock.libs.libpdb import split_ensemble
 
 
 NaN = float("nan")
@@ -181,13 +183,15 @@ def retrieve_models(
         input_dic: dict[int, list[PDBFile]] = {}
 
         for i, element in enumerate(self.output):
-            if isinstance(element, dict):
+            # Make molecules from elements
+            molecule = Molecule(element)
+            if isinstance(molecule.pdb_files, dict):
                 position_list: list[PDBFile] = input_dic.setdefault(i, [])
                 for key in element:
                     position_list.append(element[key])  # type: ignore
-
-            elif element.file_type == Format.PDB:  # type: ignore
+            elif molecule.pdb_files.file_type == Format.PDB:  # type: ignore
                 model_list.append(element)  # type: ignore
+
         if input_dic and not crossdock and not individualize:
             # check if all ensembles contain the same number of models
             sub_lists = iter(input_dic.values())
@@ -198,7 +202,6 @@ def retrieve_models(
                     " cannot prepare pairwise complexes."
                     )
                 raise Exception(_msg)
-
             # prepare pairwise combinations
             model_list = [values for values in zip(*input_dic.values())]  # type: ignore
         elif input_dic and crossdock and not individualize:
@@ -271,77 +274,148 @@ def load_from_input_molecules(
         """
         # Gather all input molecules
         input_molecules = list(input_molecules_dir.glob('*.pdb'))
+        assert input_molecules != [], \
+            f"No molecules could be found in `{input_molecules_dir}`"
         # Sort them by creation date (which is also input order)
         input_molecules.sort(key=getmtime)  # FIXME: getctime ?
         # Set input attribute
         self.input = input_molecules
 
         # Set parsing variables
-        molecules_dic: dict[int, dict[int, PDBFile]] = {}
-        # Loop over input molecules
-        for i, molecule in enumerate(self.input, start=1):
-            # Split models (these come already sorted)
-            splited_models = libpdb.split_ensemble(
-                molecule,
-                dest=input_molecules_dir,
-                )
-            # get the MD5 hash of each model
-            md5_dic = self.get_md5(molecule)
-            origin_names = self.get_ensemble_origin(molecule)
-            # Initiate with empty list
-            molecules_dic.setdefault(i, {})
-            # Loop over conformers of this ensemble
-            for j, model in enumerate(splited_models):
-                processed_model = model
-                model_name = model.stem
-                # Search of md5 information
-                md5_hash = None
-                try:
-                    model_id = int(model_name.split("_")[-1])
-                except ValueError:
-                    model_id = 0
-                if model_id in md5_dic:
-                    md5_hash = md5_dic[model_id]
-                # Check if origin or md5 is available
-                if md5_hash or model_id in origin_names.keys():
-                    # Select prefix
-                    if md5_hash:  # Prioritize the md5 hash
-                        prefix_name = md5_hash
-                    else:
-                        prefix_name = origin_names[model_id]
-                    # Build new filename
-                    model_new_name = f"{prefix_name}_from_{model_name}"
-                    # Rename file
-                    processed_model = model.rename(
-                        Path(
-                            input_molecules_dir,
-                            f"{model_new_name}.{Format.PDB}",
-                            )
+        molecules_list: list[dict[int, PDBFile]] = [
+            Molecule(input_file).pdb_files
+            for input_file in self.input
+            ]
+        # And fake them to be the output of the previous io
+        self.output = molecules_list
+    
+
+class Molecule:
+    """
+    Input molecule, usually a PDB file.
+
+    Parameters
+    ----------
+    file_name : :external:py:class:`pathlib.Path`
+        The path to the molecule file.
+
+    segid : int, optional
+        The ID of the segment. Defaults to ``None``.
+
+    no_parent : boolean
+        Whether to add the parent path ``..`` to the
+        :py:attr:`haddock.libs.libstructure.Molecule.with_parent`.
+        When set to true, the ``with_parent`` attribute returns the same
+        as ``file_name``.
+    """
+
+    def __init__(
+            self,
+            pdb_file: Union[PDBFile, tuple[dict[int, PDBFile]], FilePath],
+            ) -> None:
+        self.input_file = pdb_file
+        self._pdb_files: dict[int, PDBFile] = {}
+        self.standardize_input_pdbfile()
+    
+    def standardize_input_pdbfile(self):
+        if any([isinstance(self.input_file, ftype) for ftype in (str, Path)]):
+            self.gen_pdb_object()
+        else:
+            self.pdb_files = self.input_file
+
+    @property
+    def count_models(self) -> int:
+        self._nb_models = getattr(
+            self,
+            "_nb_models",
+            1 if isinstance(self.pdb_files, PDBFile) \
+            else len(self.pdb_files.keys()),
+            )
+        return self._nb_models
+
+    @property
+    def pdb_files(self):
+        return self._pdb_files
+
+    @pdb_files.setter
+    def pdb_files(self, value: Union[dict[int, PDBFile], PDBFile]) -> None:
+        self._pdb_files = value
+    
+    def __len__(self) -> int:
+        return self.count_models
+
+    def __repr__(self) -> str:
+        return f"Molecule {self.input_file}: {len(self)} models"
+
+    def gen_pdb_object(self) -> None:
+        # Create a Path object form input file
+        pdb_filepath = self.input_file
+        if not isinstance(pdb_filepath, Path):
+            pdb_filepath = Path(pdb_filepath)
+        # Obtain origin directory
+        input_molecules_dir = pdb_filepath.parent
+        # Eventually split models (they come back sorted by order in the file)
+        splited_models = split_ensemble(
+            pdb_filepath,
+            dest=input_molecules_dir,
+            )
+        # get the MD5 hash of each model
+        md5_dic = self.get_md5(pdb_filepath)
+        origin_names = self.get_ensemble_origin(pdb_filepath)
+        # Initiate holding variable
+        pdb_files: dict[int, PDBFile] = {}
+        # Loop over conformers of this ensemble
+        for j, model in enumerate(splited_models):
+            processed_model = model
+            model_name = model.stem
+            # Search of md5 information
+            md5_hash = None
+            try:
+                model_id = int(model_name.split("_")[-1])
+            except ValueError:
+                model_id = 0
+            if model_id in md5_dic:
+                md5_hash = md5_dic[model_id]
+            # Check if origin or md5 is available
+            if md5_hash or model_id in origin_names.keys():
+                # Select prefix
+                if md5_hash:  # Prioritize the md5 hash
+                    prefix_name = md5_hash
+                else:
+                    prefix_name = origin_names[model_id]
+                # Build new filename
+                model_new_name = f"{prefix_name}_from_{model_name}"
+                # Rename file
+                processed_model = model.rename(
+                    Path(
+                        input_molecules_dir,
+                        f"{model_new_name}.pdb",
                         )
-                # Create a PDBFile object
-                pdbfile = PDBFile(
-                    processed_model,
-                    md5=md5_hash,
                     )
-                # Modify relative path attribute
-                pdbfile.rel_path = Path(
-                    "..",
-                    input_molecules_dir,
-                    pdbfile.file_name
-                    )
-                # Set origin name
-                pdbfile.ori_name = molecule
-                # Hold that conformer/model
-                molecules_dic[i][j] = pdbfile
-        # And fake them to be the output of the previous io
-        self.output = list(molecules_dic.values())
+            # Create a PDBFile object
+            pdbfile = PDBFile(
+                processed_model,
+                md5=md5_hash,
+                )
+            # Modify relative path attribute
+            pdbfile.rel_path = Path(
+                "..",
+                input_molecules_dir,
+                pdbfile.file_name
+                )
+            # Set origin name
+            pdbfile.ori_name = pdb_filepath
+            # Hold this guy
+            pdb_files[j] = pdbfile
+        # Set attribute
+        self.pdb_files = pdb_files
     
     @staticmethod
     def get_md5(ensemble_f: FilePath) -> dict[int, str]:
         """Get MD5 hash of a multi-model PDB file."""
         md5_dic: dict[int, str] = {}
         text = Path(ensemble_f).read_text()
-        lines = text.split(os.linesep)
+        lines = text.split(linesep)
         REMARK_lines = (line for line in lines if line.startswith("REMARK"))
         remd5 = re.compile(r"^[a-f0-9]{32}$")
         for line in REMARK_lines:
@@ -379,7 +453,7 @@ def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]:
         """
         origin_dic: dict[int, str] = {}
         text = Path(ensemble_f).read_text()
-        lines = text.split(os.linesep)
+        lines = text.split(linesep)
         REMARK_lines = (line for line in lines if line.startswith("REMARK"))
         re_origin = re.compile(
             r"REMARK\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+(([\w_-]+\.?)+)"
@@ -393,6 +467,11 @@ def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]:
         return origin_dic
 
 
+def make_molecules(paths: Iterable[Path], **kwargs: Any) -> list[Molecule]:
+    """Get input molecules from the data stream."""
+    return list(map(partial(Molecule, **kwargs), paths))
+
+
 PDBPath = Union[PDBFile, Path]
 
 PDBPathT = TypeVar("PDBPathT", bound=Union[PDBFile, Path])
diff --git a/src/haddock/libs/libpdb.py b/src/haddock/libs/libpdb.py
index 7d7e4ec6b..69c173ad6 100644
--- a/src/haddock/libs/libpdb.py
+++ b/src/haddock/libs/libpdb.py
@@ -144,6 +144,35 @@ def split_ensemble(
     return pdb_files_path
 
 
+def count_models(pdb_file_path: FilePath) -> int:
+    """Count number of models in a pdb file.
+    
+    Read filepath and return number of models found in it.
+    If none (not an ensemble), 1 is returned.
+
+    Parameters
+    ----------
+    pdb_file_path : FilePath (Union[str, Path])
+        Path to the pdb file to analyse.
+    
+    Returns
+    -------
+    nb_models : int
+        The number of models found in this pdb file.
+    """
+    models_starts: int = 0
+    model_ends: int = 0
+    with open(pdb_file_path, 'r') as filin:
+        for line in filin:
+            if line.startswith("ENDMDL"):
+                model_ends += 1
+            elif line.startswith("MODEL"):
+                models_starts += 1
+    nb_models = max(1, model_ends)
+    assert max(1, models_starts) == nb_models
+    return nb_models
+
+
 def split_by_chain(pdb_file_path: FilePath) -> list[Path]:
     """Split a PDB file into multiple structures for each chain."""
     abs_path = Path(pdb_file_path).resolve().parent.absolute()
diff --git a/src/haddock/libs/libstructure.py b/src/haddock/libs/libstructure.py
deleted file mode 100644
index 46e6d6d62..000000000
--- a/src/haddock/libs/libstructure.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""Molecular data structures."""
-from functools import partial
-from pathlib import Path
-from typing import Any, Iterable, Optional
-
-
-class Molecule:
-    """
-    Input molecule, usually a PDB file.
-
-    Parameters
-    ----------
-    file_name : :external:py:class:`pathlib.Path`
-        The path to the molecule file.
-
-    segid : int, optional
-        The ID of the segment. Defaults to ``None``.
-
-    no_parent : boolean
-        Whether to add the parent path ``..`` to the
-        :py:attr:`haddock.libs.libstructure.Molecule.with_parent`.
-        When set to true, the ``with_parent`` attribute returns the same
-        as ``file_name``.
-    """
-
-    def __init__(
-            self,
-            file_name: Path,
-            segid: Optional[int] = None,
-            no_parent: bool = False,
-            ) -> None:
-        # the rest of the code is too dependent on the Path API
-        assert isinstance(file_name, Path), \
-            f"`file_name` must be pathlib.Path: {type(file_name)} given"
-
-        self.file_name = file_name
-        self.segid = segid
-        if no_parent:
-            self.with_parent = file_name
-        else:
-            self.with_parent = Path('..', file_name)
-
-
-def make_molecules(paths: Iterable[Path], **kwargs: Any) -> list[Molecule]:
-    """Get input molecules from the data stream."""
-    return list(map(partial(Molecule, **kwargs), paths))
diff --git a/src/haddock/libs/libworkflow.py b/src/haddock/libs/libworkflow.py
index e3ef4cb65..d972f627c 100644
--- a/src/haddock/libs/libworkflow.py
+++ b/src/haddock/libs/libworkflow.py
@@ -38,7 +38,8 @@ def __init__(
 
     def run(self) -> None:
         """High level workflow composer."""
-        for i, step in enumerate(self.recipe.steps[self.start :], start=self.start):
+        id_steps = enumerate(self.recipe.steps[self.start:], start=self.start)
+        for i, step in id_steps:
             try:
                 step.execute()
             except HaddockTermination:
@@ -75,8 +76,11 @@ def postprocess(self) -> None:
             if step.module_name == "caprieval":
                 capri_steps.append(step.order)  # type: ignore
         # call cli_analyse (no need for capri_dicts, it's all precalculated)
-        cli_analyse("./", capri_steps, top_cluster=10, format=None, scale=None,
-                inter=False, is_cleaned=is_cleaned, offline=offline, mode=mode, ncores=ncores)
+        cli_analyse(
+            "./", capri_steps, top_cluster=10, format=None, scale=None,
+            inter=False, is_cleaned=is_cleaned, offline=offline, mode=mode,
+            ncores=ncores,
+            )
         # call cli_traceback. If it fails, it's not a big deal
         try:
             cli_traceback("./")
diff --git a/src/haddock/modules/__init__.py b/src/haddock/modules/__init__.py
index 2efd25d50..084399d18 100644
--- a/src/haddock/modules/__init__.py
+++ b/src/haddock/modules/__init__.py
@@ -235,7 +235,7 @@ def _run(self) -> None:
 
     def run(self, **params: Any) -> None:
         """Execute the module."""
-        log.info(f"Running [{self.name}] module")
+        log.info(f"Running [{self.name}] module (step {self.order})")
 
         self.update_params(**params)
         self.add_parent_to_paths()
diff --git a/src/haddock/modules/analysis/alascan/__init__.py b/src/haddock/modules/analysis/alascan/__init__.py
index 199df61f4..07801c9c6 100644
--- a/src/haddock/modules/analysis/alascan/__init__.py
+++ b/src/haddock/modules/analysis/alascan/__init__.py
@@ -61,7 +61,7 @@ def _run(self):
 
         alascan_jobs = []
         for core in range(ncores):
-            output_name = "alascan_" + str(core) + ".scan"
+            output_name = f"alascan_{core}.scan"
             scan_obj = Scan(
                 model_list=models[index_list[core]:index_list[core + 1]],
                 output_name=output_name,
diff --git a/src/haddock/modules/analysis/alascan/scan.py b/src/haddock/modules/analysis/alascan/scan.py
index 37e72a685..5d2015f34 100644
--- a/src/haddock/modules/analysis/alascan/scan.py
+++ b/src/haddock/modules/analysis/alascan/scan.py
@@ -156,7 +156,8 @@ def add_delta_to_bfactor(pdb_f, df_scan):
     os.rename(tmp_pdb_f, pdb_f)
     return pdb_f
 
-def get_score_string(pdb_f, run_dir):
+
+def get_score_string(pdb_f: str, run_dir: str) -> list[str]:
     """Get score output from cli_score.main.
 
     Parameters
@@ -169,7 +170,7 @@ def get_score_string(pdb_f, run_dir):
     
     Returns
     -------
-    out : list
+    out : list[str]
         List of strings with the score output.
     """
     f = io.StringIO()
@@ -179,7 +180,10 @@ def get_score_string(pdb_f, run_dir):
     return out
 
 
-def calc_score(pdb_f, run_dir):
+def calc_score(
+        pdb_f: str,
+        run_dir: str,
+        ) -> tuple[float, float, float, float, float]:
     """Calculate the score of a model.
 
     Parameters
@@ -464,12 +468,13 @@ def run(self):
                     native.rel_path,
                     cutoff=self.int_cutoff
                     )
-                    
+
             atoms = get_atoms(native.rel_path)
-            coords, chain_ranges = load_coords(native.rel_path,
-                                               atoms,
-                                               add_resname=True
-                                               )
+            coords, _chain_ranges = load_coords(
+                native.rel_path,
+                atoms,
+                add_resname=True,
+                )
             resname_dict = {}
             for chain, resid, _atom, resname in coords.keys():
                 key = f"{chain}-{resid}"
@@ -490,10 +495,12 @@ def run(self):
                         c_bsa = n_bsa
                     else:
                         try:
-                            mut_pdb_name = mutate(native.rel_path,
-                                                  chain,
-                                                  res,
-                                                  end_resname)
+                            mut_pdb_name = mutate(
+                                native.rel_path,
+                                chain,
+                                res,
+                                end_resname,
+                                )
                         except KeyError:
                             continue
                         # now we score the mutated model
diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py
index d85e90dd6..db09e3d7b 100644
--- a/src/haddock/modules/topology/topoaa/__init__.py
+++ b/src/haddock/modules/topology/topoaa/__init__.py
@@ -88,22 +88,16 @@ def confirm_installation(cls) -> None:
 
     def _run(self) -> None:
         """Execute module."""
-        md5_dic: dict[int, dict[int, str]] = {}
         self.params.pop("molecules")
-        molecules: list[list[Path]] = []
+        input_molecules: list[list[PDBFile]] = []
         if self.order == 0:
             _molecules = self.previous_io.output
-            for i, models in enumerate(_molecules, start=1):
-                molecules.append([model.rel_path for model in models.values()])
-                md5_dic[i] = {
-                    j: model.md5
-                    for j, model in enumerate(models.values())
-                    }
+            input_molecules = [list(models.values()) for models in _molecules]
         else:
             # in case topoaa is not the first step, the topology is rebuilt for
             # each retrieved model
             _molecules = self.previous_io.retrieve_models()
-            molecules = [[mol.rel_path] for mol in _molecules]
+            input_molecules = [[mol] for mol in _molecules]
 
         # extracts `input` key from params. The `input` keyword needs to
         # be treated separately
@@ -125,85 +119,74 @@ def _run(self) -> None:
 
         # Pool of jobs to be executed by the CNS engine
         jobs: list[CNSJob] = []
-        models_dic: dict[int, list[Path]] = {}
-
-        for i, models in enumerate(molecules, start=1):
+        output_molecules: list[dict[int, PDBFile]] = []
+        for i, models in enumerate(input_molecules, start=1):
             self.log(f"Molecule {i}")
-            models_dic[i] = []
+            models_dic: dict[int, PDBFile] = {}
             # nice variable name, isn't it? :-)
             # molecule parameters are shared among models of the same molecule
             parameters_for_this_molecule = mol_params[mol_params_get()]
-
-            for model in models:
-                self.log(f"Sanitizing model {model.name}")
+            # Loop over models/conformers of this molecule
+            for j, model in enumerate(models):
+                # Point path of this model
+                model_path = model.rel_path
+                self.log(f"Sanitizing model {model_path.name}")
+                # Gather custom topology
                 custom_top: Optional[FilePath] = None
                 if self.params["ligand_top_fname"]:
                     custom_top = self.params["ligand_top_fname"]
                     self.log(f"Using custom topology {custom_top}")
                 libpdb.sanitize(
-                    model,
+                    model_path,
                     overwrite=True,
                     custom_topology=custom_top,
                     )
-                models_dic[i].append(model)
-
                 # Prepare generation of topologies jobs
                 topology_filename = generate_topology(
-                    model,
+                    model_path,
                     self.recipe_str,
                     self.params,
                     parameters_for_this_molecule,
                     default_params_path=self.toppar_path,
                     )
-
                 self.log(
                     f"Topology CNS input created in {topology_filename.name}"
                     )
-
                 # Add new job to the pool
-                output_filename = Path(f"{model.stem}.{Format.CNS_OUTPUT}")
-
+                output_filename = Path(f"{model_path.stem}.{Format.CNS_OUTPUT}")
                 job = CNSJob(
                     topology_filename,
                     output_filename,
                     envvars=self.envvars,
                     cns_exec=self.params["cns_exec"],
                     )
-
                 jobs.append(job)
-
-        # Run CNS Jobs
-        self.log(f"Running CNS Jobs n={len(jobs)}")
-        Engine = get_engine(self.params["mode"], self.params)
-        engine = Engine(jobs)
-        engine.run()
-        self.log("CNS jobs have finished")
-
-        # Check for generated output, fail if not all expected files
-        #  are found
-        expected: dict[int, dict[int, PDBFile]] = {}
-        for i, models in models_dic.items():
-            expected.setdefault(i, {})
-            for j, model in enumerate(models):
-                model_name = model.stem
+                # Generate future output files
+                model_name = model_path.stem
                 processed_pdb = Path(f"{model_name}_haddock.{Format.PDB}")
                 processed_topology = Path(
                     f"{model_name}_haddock.{Format.TOPOLOGY}"
                     )
                 topology = TopologyFile(processed_topology, path=".")
-                try:
-                    md5 = md5_dic[i][j]
-                except KeyError:
-                    md5 = None
+                # Create new PDBFile object
                 pdb = PDBFile(
                     file_name=processed_pdb,
                     topology=topology,
                     path=".",
-                    md5=md5,
+                    md5=model.md5,
                     )
-                pdb.ori_name = model.stem
-                expected[i][j] = pdb
+                pdb.ori_name = model_name
+                # Hold PDBFile into models
+                models_dic[j] = pdb
+            output_molecules.append(models_dic)
+
+        # Run CNS Jobs
+        self.log(f"Running CNS Jobs n={len(jobs)}")
+        Engine = get_engine(self.params["mode"], self.params)
+        engine = Engine(jobs)
+        engine.run()
+        self.log("CNS jobs have finished")
 
         # Save module information
-        self.output_models = list(expected.values())  # type: ignore
+        self.output_models = output_molecules  # type: ignore
         self.export_io_models(faulty_tolerance=self.params["tolerance"])

From 55e20d52b2f6037ebf4af970af29c4bd3c550e38 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 15:00:26 +0200
Subject: [PATCH 07/32] fix tests

---
 tests/test_gear_prepare_run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_gear_prepare_run.py b/tests/test_gear_prepare_run.py
index 7002e4dae..4c03091a7 100644
--- a/tests/test_gear_prepare_run.py
+++ b/tests/test_gear_prepare_run.py
@@ -119,7 +119,7 @@ def test_populate_mol_params():
         "caprieval.1": {},
         }
 
-    populate_mol_parameters(params)
+    populate_mol_parameters(params, params["topoaa.1"])
     assert "mol_fix_origin_1" in params["flexref.1"]
     assert "mol_fix_origin_2" in params["flexref.1"]
     assert "mol_fix_origin_3" in params["flexref.1"]

From 65242ddb1b72fdd2203ae71d2382bfdda3e403fb Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 15:12:30 +0200
Subject: [PATCH 08/32] fix types and tests for clustfcc

---
 src/haddock/libs/libontology.py | 2 +-
 tests/test_module_clustfcc.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py
index 17802bcb5..f799d72f7 100644
--- a/src/haddock/libs/libontology.py
+++ b/src/haddock/libs/libontology.py
@@ -166,7 +166,7 @@ def save(
             output_handler.write(jsonpickle.encode(to_save))  # type: ignore
         return fpath
 
-    def load(self, filename: FilePath) -> None:
+    def load(self, filename: Path) -> None:
         """Load the content of a given IO filename."""
         if filename.is_file():
             with open(filename) as json_file:
diff --git a/tests/test_module_clustfcc.py b/tests/test_module_clustfcc.py
index f83c72635..07349464c 100644
--- a/tests/test_module_clustfcc.py
+++ b/tests/test_module_clustfcc.py
@@ -115,7 +115,7 @@ def test_io_json(fcc_module, prot_input_list):
 
     # check the content of io.json
     io = ModuleIO()
-    io.load("io.json")
+    io.load(Path("io.json"))
     assert io.input[0].file_name == prot_input_list[0].file_name
     assert io.output[1].file_name == prot_input_list[1].file_name
 

From 537869b300f26c7fdc0b8e22899578355f1c08d4 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 15:25:13 +0200
Subject: [PATCH 09/32] Convert FilePath to Path in libontology load()

---
 src/haddock/libs/libontology.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py
index f799d72f7..189f74585 100644
--- a/src/haddock/libs/libontology.py
+++ b/src/haddock/libs/libontology.py
@@ -166,9 +166,9 @@ def save(
             output_handler.write(jsonpickle.encode(to_save))  # type: ignore
         return fpath
 
-    def load(self, filename: Path) -> None:
+    def load(self, filename: FilePath) -> None:
         """Load the content of a given IO filename."""
-        if filename.is_file():
+        if Path(filename).is_file():
             with open(filename) as json_file:
                 content = jsonpickle.decode(json_file.read())
                 self.input = content["input"]  # type: ignore

From 83793c3e9e3c24c39397f6c4ff7205d5889fa8f2 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 15:54:17 +0200
Subject: [PATCH 10/32] adding tests

---
 tests/test_libontology.py   | 76 +++++++++++++++++++++++++++++++++++++
 tests/test_module_topoaa.py | 16 --------
 2 files changed, 76 insertions(+), 16 deletions(-)
 create mode 100644 tests/test_libontology.py

diff --git a/tests/test_libontology.py b/tests/test_libontology.py
new file mode 100644
index 000000000..9bb6fc4cb
--- /dev/null
+++ b/tests/test_libontology.py
@@ -0,0 +1,76 @@
+"""Test functions and methods in haddock.libs.libontology."""
+import pytest
+from pathlib import Path
+
+from haddock.libs.libontology import (
+    Molecule,
+    PDBFile,
+    )
+
+from . import golden_data
+
+
+@pytest.fixture
+def molecule():
+    return Molecule(None)
+
+
+@pytest.fixture
+def protein():
+    return Path(golden_data, "protein.pdb")
+
+
+@pytest.fixture
+def ensemble_header_w_md5():
+    return Path(golden_data, "ens_header.pdb")
+
+
+def test_get_md5(molecule, ensemble_header_w_md5, protein):
+    """Test get_md5 method."""
+    observed_md5_dic = molecule.get_md5(ensemble_header_w_md5)
+    expected_md5_dic = {
+        1: '71098743056e0b95fbfafff690703761',
+        2: 'f7ab0b7c751adf44de0f25f53cfee50b',
+        3: '41e028d8d28b8d97148dc5e548672142',
+        4: '761cb5da81d83971c2aae2f0b857ca1e',
+        5: '6c438f941cec7c6dc092c8e48e5b1c10',
+        }
+
+    assert observed_md5_dic == expected_md5_dic
+    observed_md5_dic = molecule.get_md5(protein)
+    assert observed_md5_dic == {}
+
+
+def test_get_ensemble_origin(molecule, ensemble_header_w_md5, protein):
+    """Test get_ensemble_origin method."""
+    expected_origin_dic = {
+        1: 'T161-hybrid-fit-C2-NCS_complex_100w',
+        2: 'T161-hybrid-fit-C2-NCS_complex_101w',
+        3: 'T161-hybrid-fit-C2-NCS_complex_102w',
+        4: 'T161-hybrid-fit-C2-NCS_complex_103w',
+        5: 'T161-hybrid-fit-C2-NCS_complex_104w',
+        }
+    observed_origin = molecule.get_ensemble_origin(ensemble_header_w_md5)
+    assert observed_origin == expected_origin_dic
+    observed_origin = molecule.get_ensemble_origin(protein)
+    assert observed_origin == {}
+
+
+def test_load_single_pdb(molecule, protein):
+    """Test casting into PDBFile."""
+    # Re-initialize with a actual protein
+    molecule.__init__(protein)
+    assert isinstance(molecule.pdb_files, dict)
+    for pdbfile in molecule.pdb_files.values():
+        assert isinstance(pdbfile, PDBFile)
+    assert len(molecule) == 1
+
+
+def test_load_single_pdb(molecule, protein):
+    """Test casting into PDBFile."""
+    # Re-initialize with a actual protein
+    molecule.__init__(protein)
+    assert isinstance(molecule.pdb_files, dict)
+    for pdbfile in molecule.pdb_files.values():
+        assert isinstance(pdbfile, PDBFile)
+    assert len(molecule) == 1
diff --git a/tests/test_module_topoaa.py b/tests/test_module_topoaa.py
index 5ceaa696a..cc7c87319 100644
--- a/tests/test_module_topoaa.py
+++ b/tests/test_module_topoaa.py
@@ -75,22 +75,6 @@ def test_generate_topology(topoaa, protein):
     observed_inp_out.unlink()
 
 
-def test_get_md5(topoaa, ensemble_header_w_md5, protein):
-    """Test get_md5 method."""
-    observed_md5_dic = topoaa.get_md5(ensemble_header_w_md5)
-    expected_md5_dic = {
-        1: '71098743056e0b95fbfafff690703761',
-        2: 'f7ab0b7c751adf44de0f25f53cfee50b',
-        3: '41e028d8d28b8d97148dc5e548672142',
-        4: '761cb5da81d83971c2aae2f0b857ca1e',
-        5: '6c438f941cec7c6dc092c8e48e5b1c10'}
-
-    assert observed_md5_dic == expected_md5_dic
-
-    observed_md5_dic = topoaa.get_md5(protein)
-    assert observed_md5_dic == {}
-
-
 @pytest.mark.skip(reason="Cannot test in Github Actions")
 def test__run(topoaa, protein):
     """Test _run method."""

From 278433142e929175038ac072315dc2affaa71aca Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 16:07:08 +0200
Subject: [PATCH 11/32] fix tests

---
 src/haddock/libs/libontology.py |  2 +-
 tests/test_libontology.py       | 43 ++++++++++++++++++---------------
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py
index 189f74585..859495096 100644
--- a/src/haddock/libs/libontology.py
+++ b/src/haddock/libs/libontology.py
@@ -334,7 +334,7 @@ def count_models(self) -> int:
         return self._nb_models
 
     @property
-    def pdb_files(self):
+    def pdb_files(self) -> Union[dict[int, PDBFile], PDBFile]:
         return self._pdb_files
 
     @pdb_files.setter
diff --git a/tests/test_libontology.py b/tests/test_libontology.py
index 9bb6fc4cb..4bbf8763d 100644
--- a/tests/test_libontology.py
+++ b/tests/test_libontology.py
@@ -1,5 +1,8 @@
 """Test functions and methods in haddock.libs.libontology."""
 import pytest
+import tempfile
+import shutil
+
 from pathlib import Path
 
 from haddock.libs.libontology import (
@@ -25,7 +28,11 @@ def ensemble_header_w_md5():
     return Path(golden_data, "ens_header.pdb")
 
 
-def test_get_md5(molecule, ensemble_header_w_md5, protein):
+def test_get_md5(
+        molecule: Molecule,
+        ensemble_header_w_md5: Path,
+        protein: Path,
+        ):
     """Test get_md5 method."""
     observed_md5_dic = molecule.get_md5(ensemble_header_w_md5)
     expected_md5_dic = {
@@ -41,7 +48,11 @@ def test_get_md5(molecule, ensemble_header_w_md5, protein):
     assert observed_md5_dic == {}
 
 
-def test_get_ensemble_origin(molecule, ensemble_header_w_md5, protein):
+def test_get_ensemble_origin(
+        molecule: Molecule,
+        ensemble_header_w_md5: Path,
+        protein: Path,
+        ):
     """Test get_ensemble_origin method."""
     expected_origin_dic = {
         1: 'T161-hybrid-fit-C2-NCS_complex_100w',
@@ -56,21 +67,15 @@ def test_get_ensemble_origin(molecule, ensemble_header_w_md5, protein):
     assert observed_origin == {}
 
 
-def test_load_single_pdb(molecule, protein):
-    """Test casting into PDBFile."""
-    # Re-initialize with a actual protein
-    molecule.__init__(protein)
-    assert isinstance(molecule.pdb_files, dict)
-    for pdbfile in molecule.pdb_files.values():
-        assert isinstance(pdbfile, PDBFile)
-    assert len(molecule) == 1
-
-
-def test_load_single_pdb(molecule, protein):
+def test_load_single_pdb(molecule: Molecule, protein: Path):
     """Test casting into PDBFile."""
-    # Re-initialize with a actual protein
-    molecule.__init__(protein)
-    assert isinstance(molecule.pdb_files, dict)
-    for pdbfile in molecule.pdb_files.values():
-        assert isinstance(pdbfile, PDBFile)
-    assert len(molecule) == 1
+    with tempfile.TemporaryDirectory('.') as tempdir:
+        tmp_protein = Path(tempdir, protein.name)
+        shutil.copyfile(protein, tmp_protein)
+        # Re-initialize with a actual protein
+        molecule.__init__(tmp_protein)
+        assert isinstance(molecule.pdb_files, dict)
+        print(molecule.pdb_files)
+        for pdbfile in molecule.pdb_files.values():
+            assert isinstance(pdbfile, PDBFile)
+        assert len(molecule) == 1

From 620e6e19d46cb04067fed067f85375b731f1493e Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 16:34:06 +0200
Subject: [PATCH 12/32] tweak intergration tests

---
 integration_tests/test_contactmap.py   | 2 +-
 integration_tests/test_ilrmsdmatrix.py | 2 +-
 integration_tests/test_rmsdmatrix.py   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/integration_tests/test_contactmap.py b/integration_tests/test_contactmap.py
index aaabd5a05..26d4d45bb 100644
--- a/integration_tests/test_contactmap.py
+++ b/integration_tests/test_contactmap.py
@@ -18,7 +18,7 @@ def contactmap():
     """Return contmap module."""
     with tempfile.TemporaryDirectory() as tmpdir:
         preset_contactmap = CMapModule(
-            order=0,
+            order=1,
             path=Path(tmpdir),
             initial_params=CONTMAP_CONF,
             )
diff --git a/integration_tests/test_ilrmsdmatrix.py b/integration_tests/test_ilrmsdmatrix.py
index 65e11db5a..ddfe1b642 100644
--- a/integration_tests/test_ilrmsdmatrix.py
+++ b/integration_tests/test_ilrmsdmatrix.py
@@ -22,7 +22,7 @@ def ilrmsdmatrix_module():
     """Provide a parametrized IL-RMSD matrix module."""
     with tempfile.TemporaryDirectory() as tmpdir:
         ilrmsdmatrix = IlrmsdmatrixModule(
-            order=0, path=tmpdir, initial_params=DEFAULT_ILRMSD_CONFIG
+            order=1, path=Path(tmpdir), initial_params=DEFAULT_ILRMSD_CONFIG
             )
         yield ilrmsdmatrix
 
diff --git a/integration_tests/test_rmsdmatrix.py b/integration_tests/test_rmsdmatrix.py
index ed67b7829..d86e9d047 100644
--- a/integration_tests/test_rmsdmatrix.py
+++ b/integration_tests/test_rmsdmatrix.py
@@ -13,9 +13,9 @@
 
 @pytest.fixture
 def rmsdmatrix_module():
-    with tempfile.TemporaryDirectory() as tmpdir:
+    with tempfile.TemporaryDirectory(".") as tmpdir:
         ilrmsdmatrix = rmsdmatrixModule(
-            order=0, path=tmpdir, initial_params=DEFAULT_RMSD_CONFIG
+            order=1, path=Path(tmpdir), initial_params=DEFAULT_RMSD_CONFIG
         )
         yield ilrmsdmatrix
 

From 2c6e722d0a949a5a651f5dc4ae521a3a5f6a553a Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 16:45:57 +0200
Subject: [PATCH 13/32] intergaration of alascan fix

---
 integration_tests/test_alascan.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/test_alascan.py b/integration_tests/test_alascan.py
index 01e175b04..fd6ef050d 100644
--- a/integration_tests/test_alascan.py
+++ b/integration_tests/test_alascan.py
@@ -17,7 +17,7 @@ def alascan_module():
     """Return a default alascan module."""
     with tempfile.TemporaryDirectory(dir=".") as tmpdir:
         alascan = AlascanModule(
-            order=0, path=".", initial_params=DEFAULT_ALASCAN_CONFIG
+            order=1, path=".", initial_params=DEFAULT_ALASCAN_CONFIG
         )
         alascan.params["int_cutoff"] = 3.5
         yield alascan

From d9881671d100c39b565f37cc3ee6c607898f80d7 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 10 Jul 2024 17:06:52 +0200
Subject: [PATCH 14/32] fixing integration tests

---
 integration_tests/test_alascan.py |  2 +-
 integration_tests/test_topoaa.py  | 34 +++++++++++++++++++++----------
 tests/test_libontology.py         |  1 -
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/integration_tests/test_alascan.py b/integration_tests/test_alascan.py
index fd6ef050d..a1e244e33 100644
--- a/integration_tests/test_alascan.py
+++ b/integration_tests/test_alascan.py
@@ -17,7 +17,7 @@ def alascan_module():
     """Return a default alascan module."""
     with tempfile.TemporaryDirectory(dir=".") as tmpdir:
         alascan = AlascanModule(
-            order=1, path=".", initial_params=DEFAULT_ALASCAN_CONFIG
+            order=1, path=Path("."), initial_params=DEFAULT_ALASCAN_CONFIG
         )
         alascan.params["int_cutoff"] = 3.5
         yield alascan
diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py
index 6853e89f6..cd723da1e 100644
--- a/integration_tests/test_topoaa.py
+++ b/integration_tests/test_topoaa.py
@@ -1,25 +1,37 @@
 import tempfile
-from pathlib import Path
-
 import pytest
+from pathlib import Path
+from shutil import copyfile
 
-from haddock.modules.topology.topoaa import DEFAULT_CONFIG as DEFAULT_TOPOAA_CONFIG
-from haddock.modules.topology.topoaa import HaddockModule as TopoaaModule
+from haddock.core.defaults import DATA_DIRNAME
+from haddock.modules.topology.topoaa import (
+    DEFAULT_CONFIG as DEFAULT_TOPOAA_CONFIG,
+    HaddockModule as TopoaaModule,
+    )
 
 from . import CNS_EXEC, DATA_DIR, has_cns
 
 
 @pytest.fixture
-def topoaa_module():
+def molecules():
+    return [
+        Path(DATA_DIR, "docking-protein-protein/data/e2aP_1F3G.pdb"),
+        Path(DATA_DIR, "docking-protein-protein/data/hpr_ensemble.pdb"),
+        ]
+
+
+@pytest.fixture
+def topoaa_module(molecules):
     with tempfile.TemporaryDirectory() as tmpdir:
+        mol_copies = [
+            copyfile(mol, Path(tmpdir, DATA_DIRNAME, "0_topoaa", mol.name))
+            for mol in molecules
+            ]
         topoaa = TopoaaModule(
-            order=0, path=tmpdir, initial_params=DEFAULT_TOPOAA_CONFIG
+            order=0, path=Path(tmpdir), initial_params=DEFAULT_TOPOAA_CONFIG
         )
-        topoaa.__init__(path=tmpdir, order=0)
-        topoaa.params["molecules"] = [
-            Path(DATA_DIR, "docking-protein-protein/data/e2aP_1F3G.pdb"),
-            Path(DATA_DIR, "docking-protein-protein/data/hpr_ensemble.pdb"),
-        ]
+        topoaa.__init__(path=Path(tmpdir), order=0)
+        topoaa.params["molecules"] = molecules
         topoaa.params["mol1"] = {"prot_segid": "A"}
         topoaa.params["mol2"] = {"prot_segid": "B"}
 
diff --git a/tests/test_libontology.py b/tests/test_libontology.py
index 4bbf8763d..964f4c02a 100644
--- a/tests/test_libontology.py
+++ b/tests/test_libontology.py
@@ -75,7 +75,6 @@ def test_load_single_pdb(molecule: Molecule, protein: Path):
         # Re-initialize with a actual protein
         molecule.__init__(tmp_protein)
         assert isinstance(molecule.pdb_files, dict)
-        print(molecule.pdb_files)
         for pdbfile in molecule.pdb_files.values():
             assert isinstance(pdbfile, PDBFile)
         assert len(molecule) == 1

From af2acebe630014a66ab80a4d2af20c9c36c6e96a Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Thu, 11 Jul 2024 08:23:08 +0200
Subject: [PATCH 15/32] creating directories in topoaa integration tests

---
 integration_tests/test_topoaa.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py
index cd723da1e..a64ed2463 100644
--- a/integration_tests/test_topoaa.py
+++ b/integration_tests/test_topoaa.py
@@ -23,15 +23,17 @@ def molecules():
 @pytest.fixture
 def topoaa_module(molecules):
     with tempfile.TemporaryDirectory() as tmpdir:
+        input_dir_path = Path(tmpdir, DATA_DIRNAME, "0_topoaa")
+        input_dir_path.mkdir(parents=True)
         mol_copies = [
-            copyfile(mol, Path(tmpdir, DATA_DIRNAME, "0_topoaa", mol.name))
+            copyfile(mol, Path(input_dir_path, mol.name))
             for mol in molecules
             ]
         topoaa = TopoaaModule(
             order=0, path=Path(tmpdir), initial_params=DEFAULT_TOPOAA_CONFIG
         )
         topoaa.__init__(path=Path(tmpdir), order=0)
-        topoaa.params["molecules"] = molecules
+        topoaa.params["molecules"] = mol_copies
         topoaa.params["mol1"] = {"prot_segid": "A"}
         topoaa.params["mol2"] = {"prot_segid": "B"}
 

From 69a93cec08531c06505a172d123bad93a54d392e Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Thu, 11 Jul 2024 08:57:11 +0200
Subject: [PATCH 16/32] tweak path

---
 integration_tests/test_topoaa.py | 39 ++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py
index a64ed2463..134484312 100644
--- a/integration_tests/test_topoaa.py
+++ b/integration_tests/test_topoaa.py
@@ -4,6 +4,7 @@
 from shutil import copyfile
 
 from haddock.core.defaults import DATA_DIRNAME
+from haddock.libs.libio import working_directory
 from haddock.modules.topology.topoaa import (
     DEFAULT_CONFIG as DEFAULT_TOPOAA_CONFIG,
     HaddockModule as TopoaaModule,
@@ -23,23 +24,27 @@ def molecules():
 @pytest.fixture
 def topoaa_module(molecules):
     with tempfile.TemporaryDirectory() as tmpdir:
-        input_dir_path = Path(tmpdir, DATA_DIRNAME, "0_topoaa")
-        input_dir_path.mkdir(parents=True)
-        mol_copies = [
-            copyfile(mol, Path(input_dir_path, mol.name))
-            for mol in molecules
-            ]
-        topoaa = TopoaaModule(
-            order=0, path=Path(tmpdir), initial_params=DEFAULT_TOPOAA_CONFIG
-        )
-        topoaa.__init__(path=Path(tmpdir), order=0)
-        topoaa.params["molecules"] = mol_copies
-        topoaa.params["mol1"] = {"prot_segid": "A"}
-        topoaa.params["mol2"] = {"prot_segid": "B"}
-
-        topoaa.params["cns_exec"] = CNS_EXEC
-
-        yield topoaa
+        with working_directory(tmpdir):
+            modulename = "0_topoaa"
+            input_dir_path = Path(DATA_DIRNAME, modulename)
+            input_dir_path.mkdir(parents=True)
+            mol_copies = [
+                copyfile(mol, Path(input_dir_path, mol.name))
+                for mol in molecules
+                ]
+            topoaa = TopoaaModule(
+                order=0,
+                path=Path(modulename),
+                initial_params=DEFAULT_TOPOAA_CONFIG,
+                )
+            topoaa.__init__(path=Path(modulename), order=0)
+            topoaa.params["molecules"] = mol_copies
+            topoaa.params["mol1"] = {"prot_segid": "A"}
+            topoaa.params["mol2"] = {"prot_segid": "B"}
+
+            topoaa.params["cns_exec"] = CNS_EXEC
+
+            yield topoaa
 
 
 @has_cns

From 9e38b36d322926e6366d7fc95f08050274991822 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Thu, 11 Jul 2024 09:08:36 +0200
Subject: [PATCH 17/32] tweak path

---
 integration_tests/test_topoaa.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py
index 134484312..0543922bd 100644
--- a/integration_tests/test_topoaa.py
+++ b/integration_tests/test_topoaa.py
@@ -25,8 +25,9 @@ def molecules():
 def topoaa_module(molecules):
     with tempfile.TemporaryDirectory() as tmpdir:
         with working_directory(tmpdir):
-            modulename = "0_topoaa"
-            input_dir_path = Path(DATA_DIRNAME, modulename)
+            modulename_path = Path("0_topoaa")
+            modulename_path.mkdir(parents=True)
+            input_dir_path = Path(DATA_DIRNAME, modulename_path)
             input_dir_path.mkdir(parents=True)
             mol_copies = [
                 copyfile(mol, Path(input_dir_path, mol.name))
@@ -34,10 +35,10 @@ def topoaa_module(molecules):
                 ]
             topoaa = TopoaaModule(
                 order=0,
-                path=Path(modulename),
+                path=modulename_path,
                 initial_params=DEFAULT_TOPOAA_CONFIG,
                 )
-            topoaa.__init__(path=Path(modulename), order=0)
+            topoaa.__init__(path=modulename_path, order=0)
             topoaa.params["molecules"] = mol_copies
             topoaa.params["mol1"] = {"prot_segid": "A"}
             topoaa.params["mol2"] = {"prot_segid": "B"}

From b16d20296a8429472b24f2b3483fe03fc3e224d6 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Thu, 11 Jul 2024 09:58:12 +0200
Subject: [PATCH 18/32] tweak path

---
 integration_tests/test_topoaa.py | 34 +++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py
index 0543922bd..8679838fa 100644
--- a/integration_tests/test_topoaa.py
+++ b/integration_tests/test_topoaa.py
@@ -22,7 +22,7 @@ def molecules():
 
 
 @pytest.fixture
-def topoaa_module(molecules):
+def prepare_topoaa_run(molecules):
     with tempfile.TemporaryDirectory() as tmpdir:
         with working_directory(tmpdir):
             modulename_path = Path("0_topoaa")
@@ -33,19 +33,25 @@ def topoaa_module(molecules):
                 copyfile(mol, Path(input_dir_path, mol.name))
                 for mol in molecules
                 ]
-            topoaa = TopoaaModule(
-                order=0,
-                path=modulename_path,
-                initial_params=DEFAULT_TOPOAA_CONFIG,
-                )
-            topoaa.__init__(path=modulename_path, order=0)
-            topoaa.params["molecules"] = mol_copies
-            topoaa.params["mol1"] = {"prot_segid": "A"}
-            topoaa.params["mol2"] = {"prot_segid": "B"}
-
-            topoaa.params["cns_exec"] = CNS_EXEC
-
-            yield topoaa
+            yield modulename_path, mol_copies
+
+@pytest.fixture
+def topoaa_module(prepare_topoaa_run):
+    modulename_path = prepare_topoaa_run[0]
+    mol_copies = prepare_topoaa_run[1]
+    topoaa = TopoaaModule(
+        order=0,
+        path=modulename_path,
+        initial_params=DEFAULT_TOPOAA_CONFIG,
+        )
+    #topoaa.__init__(path=modulename_path, order=0)
+    topoaa.params["molecules"] = mol_copies
+    topoaa.params["mol1"] = {"prot_segid": "A"}
+    topoaa.params["mol2"] = {"prot_segid": "B"}
+
+    topoaa.params["cns_exec"] = CNS_EXEC
+
+    yield topoaa
 
 
 @has_cns

From 5a6be5c8c7442ca5c5a1b54a4debfeed8f458aa4 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Thu, 11 Jul 2024 16:56:28 +0200
Subject: [PATCH 19/32] improved regex for ensemble origin parsing

---
 integration_tests/test_topoaa.py                | 1 +
 src/haddock/libs/libontology.py                 | 2 +-
 src/haddock/modules/topology/topoaa/__init__.py | 2 +-
 tests/golden_data/ens_header.pdb                | 3 ++-
 tests/test_libontology.py                       | 1 +
 5 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py
index 8679838fa..c3bbccb51 100644
--- a/integration_tests/test_topoaa.py
+++ b/integration_tests/test_topoaa.py
@@ -35,6 +35,7 @@ def prepare_topoaa_run(molecules):
                 ]
             yield modulename_path, mol_copies
 
+
 @pytest.fixture
 def topoaa_module(prepare_topoaa_run):
     modulename_path = prepare_topoaa_run[0]
diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py
index 9276435e9..822885c24 100644
--- a/src/haddock/libs/libontology.py
+++ b/src/haddock/libs/libontology.py
@@ -458,7 +458,7 @@ def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]:
         lines = text.split(linesep)
         REMARK_lines = (line for line in lines if line.startswith("REMARK"))
         re_origin = re.compile(
-            r"REMARK\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+(([\w_-]+\.?)+)"
+            r"REMARK\s+\d*\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+[\./]{0,2}(([\w_-]+[/]?)+)\.?"  # noqa : E501
             )
         for line in REMARK_lines:
             if (match := re_origin.search(line)):
diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py
index ee8d027ea..ba94217b7 100644
--- a/src/haddock/modules/topology/topoaa/__init__.py
+++ b/src/haddock/modules/topology/topoaa/__init__.py
@@ -106,7 +106,7 @@ def _run(self) -> None:
         # extracts `input` key from params. The `input` keyword needs to
         # be treated separately
         mol_params: ParamDict = {}
-        for k in list(self.params.keys()):
+        for k in self.params.keys():
             if k.startswith("mol") and k[3:].isdigit():
                 mol_params[k] = self.params.pop(k)
 
diff --git a/tests/golden_data/ens_header.pdb b/tests/golden_data/ens_header.pdb
index 628625cd5..2a568957e 100644
--- a/tests/golden_data/ens_header.pdb
+++ b/tests/golden_data/ens_header.pdb
@@ -7,4 +7,5 @@ REMARK     MODEL 1 FROM T161-hybrid-fit-C2-NCS_complex_100w.pdb
 REMARK     MODEL 2 FROM T161-hybrid-fit-C2-NCS_complex_101w.pdb
 REMARK     MODEL 3 FROM T161-hybrid-fit-C2-NCS_complex_102w.pdb
 REMARK     MODEL 4 FROM T161-hybrid-fit-C2-NCS_complex_103w.pdb
-REMARK     MODEL 5 FROM T161-hybrid-fit-C2-NCS_complex_104w.pdb
\ No newline at end of file
+REMARK     MODEL 5 FROM T161-hybrid-fit-C2-NCS_complex_104w.pdb
+REMARK    4    MODEL     6 FROM ./hguiw/fewjfo/efewfhewiof/73b07fb2ab6b3245_t264_1.pdb
diff --git a/tests/test_libontology.py b/tests/test_libontology.py
index c99ece04a..f24322108 100644
--- a/tests/test_libontology.py
+++ b/tests/test_libontology.py
@@ -133,6 +133,7 @@ def test_get_ensemble_origin(
         3: 'T161-hybrid-fit-C2-NCS_complex_102w',
         4: 'T161-hybrid-fit-C2-NCS_complex_103w',
         5: 'T161-hybrid-fit-C2-NCS_complex_104w',
+        6: '73b07fb2ab6b3245_t264_1',
         }
     observed_origin = molecule.get_ensemble_origin(ensemble_header_w_md5)
     assert observed_origin == expected_origin_dic

From 2bc55956defca8babd33beebe74ffa9ba90b754e Mon Sep 17 00:00:00 2001
From: Victor Reys <132575181+VGPReys@users.noreply.github.com>
Date: Mon, 15 Jul 2024 08:46:26 +0200
Subject: [PATCH 20/32] Update libontology.py regex

---
 src/haddock/libs/libontology.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py
index 822885c24..601b72d95 100644
--- a/src/haddock/libs/libontology.py
+++ b/src/haddock/libs/libontology.py
@@ -457,13 +457,15 @@ def get_ensemble_origin(ensemble_f: FilePath) -> dict[int, str]:
         text = Path(ensemble_f).read_text()
         lines = text.split(linesep)
         REMARK_lines = (line for line in lines if line.startswith("REMARK"))
+        # Compile regex to parse filepath
+        # https://regex101.com/r/fH0J6a/1
         re_origin = re.compile(
-            r"REMARK\s+\d*\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+[\./]{0,2}(([\w_-]+[/]?)+)\.?"  # noqa : E501
+            r"REMARK\s+\d*\s+MODEL\s+(\d+)\s+(FROM|from|From)\s+[\./]{0,2}(([\w_\.-]+[/]?)+)\.?"  # noqa : E501
             )
         for line in REMARK_lines:
             if (match := re_origin.search(line)):
                 model_num = int(match.group(1).strip())
-                original_path = match.group(3).strip()
+                original_path = match.group(4).strip()
                 original_name = Path(original_path).stem
                 origin_dic[model_num] = original_name
         return origin_dic

From cdfefd4d1ad6d597c5fa89edc1494925fa6ee9a4 Mon Sep 17 00:00:00 2001
From: Victor Reys <132575181+VGPReys@users.noreply.github.com>
Date: Mon, 15 Jul 2024 09:05:54 +0200
Subject: [PATCH 21/32] Update cli_score.py

---
 src/haddock/clis/cli_score.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/haddock/clis/cli_score.py b/src/haddock/clis/cli_score.py
index 0f03d6394..ea9c118e0 100644
--- a/src/haddock/clis/cli_score.py
+++ b/src/haddock/clis/cli_score.py
@@ -106,7 +106,19 @@ def maincli() -> None:
     cli(_ap(), main)
 
 
-def get_parameters(kwargs: Any) -> dict[str, Any]:
+def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]:
+    """Obtain and validate command line arguments and add defaults one.
+
+    Parameters
+    ----------
+    kwargs : dict[str, Any]
+        Command line arguments (supposed to be emsocring parameters)
+
+    Return
+    ------
+    ems_dict : dict[str, Any]
+        Default parameters updated by command line arguments.
+    """
     from os import linesep
     from haddock.gear.yaml2cfg import read_from_yaml_config
     from haddock.modules.scoring.emscoring import DEFAULT_CONFIG

From 38bb99b82ddf685e8f56d2da13e007821cbf6638 Mon Sep 17 00:00:00 2001
From: Victor Reys <132575181+VGPReys@users.noreply.github.com>
Date: Mon, 15 Jul 2024 09:06:36 +0200
Subject: [PATCH 22/32] Update __init__.py

---
 src/haddock/modules/topology/topoaa/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py
index ba94217b7..ee8d027ea 100644
--- a/src/haddock/modules/topology/topoaa/__init__.py
+++ b/src/haddock/modules/topology/topoaa/__init__.py
@@ -106,7 +106,7 @@ def _run(self) -> None:
         # extracts `input` key from params. The `input` keyword needs to
         # be treated separately
         mol_params: ParamDict = {}
-        for k in self.params.keys():
+        for k in list(self.params.keys()):
             if k.startswith("mol") and k[3:].isdigit():
                 mol_params[k] = self.params.pop(k)
 

From 102c8f4606e7f216ff63b63fd3cd7fcf38e0ac34 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 17 Jul 2024 12:50:40 +0200
Subject: [PATCH 23/32] revision v1.1

---
 integration_tests/test_topoaa.py              |  1 -
 src/haddock/clis/cli_score.py                 |  9 +++--
 src/haddock/gear/prepare_run.py               | 37 +++++++++++++------
 src/haddock/libs/libontology.py               |  7 +++-
 .../modules/topology/topoaa/__init__.py       |  7 ++--
 tests/test_libontology.py                     | 20 +++++-----
 6 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/integration_tests/test_topoaa.py b/integration_tests/test_topoaa.py
index c3bbccb51..b018b8e36 100644
--- a/integration_tests/test_topoaa.py
+++ b/integration_tests/test_topoaa.py
@@ -45,7 +45,6 @@ def topoaa_module(prepare_topoaa_run):
         path=modulename_path,
         initial_params=DEFAULT_TOPOAA_CONFIG,
         )
-    #topoaa.__init__(path=modulename_path, order=0)
     topoaa.params["molecules"] = mol_copies
     topoaa.params["mol1"] = {"prot_segid": "A"}
     topoaa.params["mol2"] = {"prot_segid": "B"}
diff --git a/src/haddock/clis/cli_score.py b/src/haddock/clis/cli_score.py
index ea9c118e0..44a9663c6 100644
--- a/src/haddock/clis/cli_score.py
+++ b/src/haddock/clis/cli_score.py
@@ -18,7 +18,6 @@
 """
 import argparse
 import sys
-import tempfile
 
 from haddock.core.typing import (
     Any,
@@ -139,17 +138,19 @@ def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]:
                 f"* ATTENTION * Value ({value}) of parameter {param} "
                 f"different from default ({default_emscoring[param]})"
                 )
-            # get the type of default value
-            default_type = type(default_emscoring[param])
             # convert the value to the same type
-            if default_type == bool:
+            if isinstance(default_emscoring[param], bool):
+                # In the case of boolean type
                 if value.lower() not in ["true", "false"]:
                     sys.exit(
                         f"* ERROR * Boolean parameter {param} "
                         "should be True or False"
                         )
+                # convert into pythonic True or False
                 value = value.lower() == "true"
             else:
+                # Cast value into specific python3 type
+                default_type = type(default_emscoring[param])
                 value = default_type(value)
             ems_dict[param] = value
             n_warnings += 1
diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py
index d4c2ab2af..ca32bf706 100644
--- a/src/haddock/gear/prepare_run.py
+++ b/src/haddock/gear/prepare_run.py
@@ -316,9 +316,15 @@ def setup_run(
         )
 
     first_module_id = list(modules_params.keys())[0]
-    if (topoaa_module_id := "topoaa.1") in modules_params.keys():
-        topology_params = modules_params[topoaa_module_id]
+    # Here we check if topoaa is the first module in the workflow.
+    # If it is, we gather the parameters of topoaa in the topology_params,
+    # as equired by the function populate_topology_molecule_params(),
+    # to map the molX parameters info to input molecules.
+    # Without this, we loose the information.
+    if first_module_id == "topoaa.1":
+        topology_params = modules_params[first_module_id]
     else:
+        # If not, just fake an empty set of parameters
         topology_params = {}
 
     if starting_from_copy:
@@ -335,10 +341,6 @@ def setup_run(
             general_params["molecules"],
             topology_params,
         )
-        # copy_molecules_to_topology(
-        #     general_params["molecules"],
-        #     modules_params[first_module_id],
-        # )
 
         max_mols = len(topology_params["molecules"])
         if max_mols > max_molecules_allowed:
@@ -816,6 +818,9 @@ def copy_molecules_to_data_dir(
 
     topoaa_params : dict
         A dictionary containing the topoaa parameters.
+    
+    _first_module_name : str
+        Name of the first module used in the workflow.
 
     preprocess : bool
         Whether to preprocess input molecules. Defaults to ``True``.
@@ -823,7 +828,7 @@ def copy_molecules_to_data_dir(
     """
     # Removes digit from module name
     # Build regex to capture '<name>.<digit>'
-    name_digit_regex = re.compile(r"(\w+)\.\d+")
+    name_digit_regex = re.compile(r"(\w+)(\.\d+)?")
     first_module_name: str = "input_molecules"
     if match := name_digit_regex.search(_first_module_name):
         first_module_name = match.group(1)
@@ -1061,16 +1066,24 @@ def _get_expandable(
     return allowed_params
 
 
-def populate_topology_molecule_params(topoaa: ParamMap) -> None:
-    """Populate topoaa `molX` subdictionaries."""
+def populate_topology_molecule_params(topology_params: ParamMap) -> None:
+    """Populate topoaa `molX` subdictionaries.
+
+    Parameters
+    ----------
+    topology_params : ParamMap
+        Dictionary of parameter with their values.
+        Possibily parameters from topoaa module.
+        If not, nothing will happen
+    """
     topoaa_dft = _read_defaults("topoaa.1")
 
-    for i in range(1, len(topoaa["molecules"]) + 1):
+    for i in range(1, len(topology_params["molecules"]) + 1):
         mol = f"mol{i}"
 
-        topoaa[mol] = recursive_dict_update(
+        topology_params[mol] = recursive_dict_update(
             topoaa_dft["mol1"],
-            topoaa[mol] if mol in topoaa else {},
+            topology_params[mol] if mol in topology_params else {},
         )
     return
 
diff --git a/src/haddock/libs/libontology.py b/src/haddock/libs/libontology.py
index 601b72d95..65886ffcb 100644
--- a/src/haddock/libs/libontology.py
+++ b/src/haddock/libs/libontology.py
@@ -13,6 +13,7 @@
 import jsonpickle
 
 from haddock.core.defaults import MODULE_IO_FILE
+from haddock.core.exceptions import SetupError
 from haddock.core.typing import (
     Any,
     FilePath,
@@ -276,8 +277,10 @@ def load_from_input_molecules(
         """
         # Gather all input molecules
         input_molecules = list(input_molecules_dir.glob('*.pdb'))
-        assert input_molecules != [], \
-            f"No molecules could be found in `{input_molecules_dir}`"
+        if input_molecules == []:
+            raise SetupError(
+                f"No molecules could be found in `{input_molecules_dir}`"
+                )
         # Sort them by creation date (which is also input order)
         input_molecules.sort(key=getmtime)  # FIXME: getctime ?
         # Set input attribute
diff --git a/src/haddock/modules/topology/topoaa/__init__.py b/src/haddock/modules/topology/topoaa/__init__.py
index ee8d027ea..7c44af141 100644
--- a/src/haddock/modules/topology/topoaa/__init__.py
+++ b/src/haddock/modules/topology/topoaa/__init__.py
@@ -154,9 +154,10 @@ def _run(self) -> None:
                     default_params_path=self.toppar_path,
                     write_to_disk=not self.params["less_io"],
                     )
-                self.log(
-                    f"Topology CNS input created in {topoaa_input.name}"
-                    )
+                if isinstance(topoaa_input, Path):
+                    self.log(
+                        f"Topology CNS input created in {topoaa_input.name}"
+                        )
 
                 # Add new job to the pool
                 output_filename = Path(f"{model_path.stem}.{Format.CNS_OUTPUT}")
diff --git a/tests/test_libontology.py b/tests/test_libontology.py
index f24322108..50306b497 100644
--- a/tests/test_libontology.py
+++ b/tests/test_libontology.py
@@ -34,7 +34,7 @@ def output_pdbfile() -> Generator[PDBFile, None, None]:
 
 
 @pytest.fixture
-def moduleio_with_pdbfile_list(input_pdbfile, output_pdbfile):
+def moduleio_with_pdbfile_list(input_pdbfile: PDBFile, output_pdbfile: PDBFile):
     m = ModuleIO()
     m.input = [input_pdbfile]
     m.output = [output_pdbfile, output_pdbfile]
@@ -42,7 +42,7 @@ def moduleio_with_pdbfile_list(input_pdbfile, output_pdbfile):
 
 
 @pytest.fixture
-def moduleio_with_pdbfile_dict(output_pdbfile):
+def moduleio_with_pdbfile_dict(output_pdbfile: PDBFile):
     m = ModuleIO()
     m.input = []
     m.output = [
@@ -76,7 +76,7 @@ def io_data() -> dict:
 
 
 @pytest.fixture
-def io_json_file(io_data) -> Generator[Path, None, None]:
+def io_json_file(io_data: dict) -> Generator[Path, None, None]:
     with tempfile.NamedTemporaryFile(mode="w+") as f:
 
         json.dump(io_data, f)
@@ -88,7 +88,7 @@ def io_json_file(io_data) -> Generator[Path, None, None]:
 
 @pytest.fixture
 def molecule():
-    return Molecule(None)
+    return Molecule(None)  # type: ignore
 
 
 @pytest.fixture
@@ -306,7 +306,7 @@ def test_moduleio_add_list():
     assert moduleio.output == ["literally", "anything"]
 
 
-def test_moduleio_save(mocker, moduleio_with_pdbfile_list):
+def test_moduleio_save(mocker, moduleio_with_pdbfile_list: ModuleIO):
 
     with tempfile.NamedTemporaryFile() as temp_module_io_f:
         mocker.patch("haddock.core.defaults", temp_module_io_f.name)
@@ -328,7 +328,7 @@ def test_moduleio_save(mocker, moduleio_with_pdbfile_list):
         assert isinstance(observed_data, dict)
 
 
-def test_moduleio_load(io_json_file, io_data):
+def test_moduleio_load(io_json_file: Path, io_data: dict):
 
     moduleio = ModuleIO()
     moduleio.load(filename=io_json_file)
@@ -337,7 +337,7 @@ def test_moduleio_load(io_json_file, io_data):
     assert moduleio.output == io_data["output"]
 
 
-def test_moduleio_retrieve_models_list(moduleio_with_pdbfile_list):
+def test_moduleio_retrieve_models_list(moduleio_with_pdbfile_list: ModuleIO):
 
     result = moduleio_with_pdbfile_list.retrieve_models()
 
@@ -346,7 +346,7 @@ def test_moduleio_retrieve_models_list(moduleio_with_pdbfile_list):
     assert isinstance(result[1], PDBFile)
 
 
-def test_moduleio_retrieve_models_dict(moduleio_with_pdbfile_dict):
+def test_moduleio_retrieve_models_dict(moduleio_with_pdbfile_dict: ModuleIO):
 
     result = moduleio_with_pdbfile_dict.retrieve_models(
         crossdock=True, individualize=True
@@ -379,7 +379,7 @@ def test_moduleio_retrieve_models_dict(moduleio_with_pdbfile_dict):
     assert isinstance(result[0][0], PDBFile)
 
 
-def test_moduleio_check_faulty(mocker, module_io_with_persistent):
+def test_moduleio_check_faulty(mocker, module_io_with_persistent: ModuleIO):
 
     mocker.patch.object(module_io_with_persistent, "remove_missing", return_value=None)
 
@@ -400,7 +400,7 @@ def test_moduleio_check_faulty(mocker, module_io_with_persistent):
     assert result == pytest.approx(10.0)
 
 
-def test_moduleio_remove_missing(module_io_with_persistent):
+def test_moduleio_remove_missing(module_io_with_persistent: ModuleIO):
 
     # Remove the first file
     first_file = module_io_with_persistent.output[0].rel_path

From a03cdb5cfbc2d920290c70353473b8f38759d021 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 17 Jul 2024 14:36:36 +0200
Subject: [PATCH 24/32] modify step index of caprieval and rigidbody
 integration tests

---
 integration_tests/test_caprieval.py | 2 +-
 integration_tests/test_rigidbody.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/integration_tests/test_caprieval.py b/integration_tests/test_caprieval.py
index ac51e39c0..c276d93aa 100644
--- a/integration_tests/test_caprieval.py
+++ b/integration_tests/test_caprieval.py
@@ -16,7 +16,7 @@
 def caprieval_module():
     with tempfile.TemporaryDirectory() as tmpdir:
         yield CaprievalModule(
-            order=0,
+            order=1,
             path=Path(tmpdir),
             init_params=DEFAULT_CAPRIEVAL_CONFIG,
         )
diff --git a/integration_tests/test_rigidbody.py b/integration_tests/test_rigidbody.py
index fa69f3452..f75c847f1 100644
--- a/integration_tests/test_rigidbody.py
+++ b/integration_tests/test_rigidbody.py
@@ -16,7 +16,7 @@
 def rigidbody_module():
     with tempfile.TemporaryDirectory() as tmpdir:
         rigidbody = RigidbodyModule(
-            order=0, path=Path(tmpdir), initial_params=DEFAULT_RIGIDBODY_CONFIG
+            order=1, path=Path(tmpdir), initial_params=DEFAULT_RIGIDBODY_CONFIG
         )
         yield rigidbody
 

From cb8102541366bb79d99417d0819234835bf3e871 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 17 Jul 2024 14:56:59 +0200
Subject: [PATCH 25/32] removing List type import

---
 src/haddock/core/typing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/haddock/core/typing.py b/src/haddock/core/typing.py
index c8155caab..73a523b0d 100644
--- a/src/haddock/core/typing.py
+++ b/src/haddock/core/typing.py
@@ -30,7 +30,6 @@
     Generic,
     Iterable,
     Iterator,
-    List,
     Literal,
     Mapping,
     MutableMapping,

From 9add2c104c81d3f488fadac89c523dcec3e78213 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 17 Jul 2024 15:32:32 +0200
Subject: [PATCH 26/32] adding tests

---
 src/haddock/clis/cli_score.py | 20 +++++++++++++++-----
 src/haddock/core/typing.py    |  1 +
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/haddock/clis/cli_score.py b/src/haddock/clis/cli_score.py
index 44a9663c6..557764bef 100644
--- a/src/haddock/clis/cli_score.py
+++ b/src/haddock/clis/cli_score.py
@@ -19,6 +19,7 @@
 import argparse
 import sys
 
+from haddock.core.exceptions import ConfigurationError
 from haddock.core.typing import (
     Any,
     ArgumentParser,
@@ -121,13 +122,13 @@ def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]:
     from os import linesep
     from haddock.gear.yaml2cfg import read_from_yaml_config
     from haddock.modules.scoring.emscoring import DEFAULT_CONFIG
-    # config all parameters are correctly spelled.
+    # check all parameters are correctly spelled.
     default_emscoring = read_from_yaml_config(DEFAULT_CONFIG)
     ems_dict = default_emscoring.copy()
     n_warnings = 0
     for param, value in kwargs.items():
         if param not in default_emscoring:
-            sys.exit(
+            raise ConfigurationError(
                 f"* ERROR * Parameter {param!r} is not a "
                 f"valid `emscoring` parameter.{linesep}"
                 "Valid emscoring parameters are: "
@@ -142,7 +143,7 @@ def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]:
             if isinstance(default_emscoring[param], bool):
                 # In the case of boolean type
                 if value.lower() not in ["true", "false"]:
-                    sys.exit(
+                    raise ConfigurationError(
                         f"* ERROR * Boolean parameter {param} "
                         "should be True or False"
                         )
@@ -151,7 +152,13 @@ def get_parameters(kwargs: dict[str, Any]) -> dict[str, Any]:
             else:
                 # Cast value into specific python3 type
                 default_type = type(default_emscoring[param])
-                value = default_type(value)
+                try:
+                    value = default_type(value)
+                except ValueError:
+                    raise ConfigurationError(
+                        f"* ERROR * parameter '{param}' must be of "
+                        f"type '{default_type.__name__}'"
+                        )
             ems_dict[param] = value
             n_warnings += 1
     if n_warnings != 0:
@@ -219,7 +226,10 @@ def main(
         sys.exit(f"* ERROR * Input PDB file {str(input_pdb)!r} does not exist")
 
     # Get parameters
-    ems_dict = get_parameters(kwargs)
+    try:
+        ems_dict = get_parameters(kwargs)
+    except ConfigurationError as config_error:
+        sys.exit(config_error)
 
     # create run directory
     run_dir = Path(run_dir)
diff --git a/src/haddock/core/typing.py b/src/haddock/core/typing.py
index 73a523b0d..e5d048fb2 100644
--- a/src/haddock/core/typing.py
+++ b/src/haddock/core/typing.py
@@ -31,6 +31,7 @@
     Iterable,
     Iterator,
     Literal,
+    List,
     Mapping,
     MutableMapping,
     Optional,

From f8d9dd4ce6fb1146b7d6bd4f53d0485dcceb08be Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Wed, 17 Jul 2024 15:33:33 +0200
Subject: [PATCH 27/32] adding tests

---
 tests/test_cli_score.py | 76 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 tests/test_cli_score.py

diff --git a/tests/test_cli_score.py b/tests/test_cli_score.py
new file mode 100644
index 000000000..4471a9cbb
--- /dev/null
+++ b/tests/test_cli_score.py
@@ -0,0 +1,76 @@
+"""Tests related to haddock.clis.cli_score"""
+import pytest
+
+from haddock.clis.cli_score import get_parameters
+from haddock.core.exceptions import ConfigurationError
+from haddock.gear.yaml2cfg import read_from_yaml_config
+from haddock.modules.scoring.emscoring import (
+    DEFAULT_CONFIG as EMSCORING_DEFAULTS_CONFIG_PATH,
+    )
+
+
+@pytest.fixture
+def empty_params() -> dict:
+    return {}
+
+
+@pytest.fixture
+def v_cmd_line_params() -> dict:
+    return {
+        "w_bsa": 10,
+        "w_desolv": 10,
+        "w_elec": 10,
+        "w_vdw": 10,
+        }
+
+
+@pytest.fixture
+def wrong_params() -> dict:
+    return {"fake": "wrong"}
+
+
+@pytest.fixture
+def wrong_params_type() -> dict:
+    return {"w_bsa": "wrong"}
+
+
+@pytest.fixture
+def default_emscoring_params() -> dict:
+    default_emscoring = read_from_yaml_config(EMSCORING_DEFAULTS_CONFIG_PATH)
+    return default_emscoring
+
+
+def test_no_input_params(empty_params: dict, default_emscoring_params: dict):
+    """Test get_parameters without inputs."""
+    final_params = get_parameters(empty_params)
+    assert isinstance(final_params, dict)
+    for param_name, param_value in default_emscoring_params.items():
+        assert final_params[param_name] == param_value
+
+
+def test_input_params(
+        v_cmd_line_params: dict[str, int],
+        default_emscoring_params: dict,
+        ):
+    """Test get_parameters with inputs."""
+    final_params = get_parameters(v_cmd_line_params)
+    assert isinstance(final_params, dict)
+    for param_name, param_value in default_emscoring_params.items():
+        if param_name in v_cmd_line_params.keys():
+            assert final_params[param_name] == v_cmd_line_params[param_name]
+        else:
+            assert final_params[param_name] == param_value
+
+
+def test_wrong_params(wrong_params: dict[str, str]):
+    """Test get_parameters with wrong inputs."""
+    with pytest.raises(ConfigurationError):
+        final_params = get_parameters(wrong_params)
+        assert final_params is None
+
+
+def test_wrong_param_type(wrong_params_type: dict[str, str]):
+    """Test get_parameters with wrong inputs."""
+    with pytest.raises(ConfigurationError):
+        final_params = get_parameters(wrong_params_type)
+        assert final_params is None

From 451f942fdc52023082647fa355c5bab75c846b41 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Mon, 29 Jul 2024 16:11:45 +0200
Subject: [PATCH 28/32] adding new exception DependencyError

---
 src/haddock/core/exceptions.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/src/haddock/core/exceptions.py b/src/haddock/core/exceptions.py
index 44a2c8578..0c20d92fa 100644
--- a/src/haddock/core/exceptions.py
+++ b/src/haddock/core/exceptions.py
@@ -53,3 +53,25 @@ class HaddockTermination(HaddockError):
     """Terminates HADDOCK."""
 
     pass
+
+
+class DependencyError(ModuleError):
+    """Error throw when required dependency not satisfied."""
+
+    def __init__(
+            self,
+            msg: str = "",
+            module: str = "",
+            dependency: str = "",
+            ):
+        self.message = msg
+        self.module = module
+        self.dependency = dependency
+
+    def __str__(self) -> str:
+        additions: str = ""
+        if self.module:
+            additions += f"Module `{self.module}` -> "
+        if self.dependency:
+            additions += f"Required dependency `{self.dependency}`"
+        return f"{self.message} {additions}"

From 3d2a34558cf335814a5bfc1e5df338355c3cc4ad Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Mon, 29 Jul 2024 16:13:22 +0200
Subject: [PATCH 29/32] adding dependency checks at workflow prepare_run

---
 src/haddock/core/defaults.py                  | 20 ++++----
 src/haddock/gear/prepare_run.py               | 50 ++++++++++++++++++-
 src/haddock/libs/libworkflow.py               |  9 ++--
 .../modules/analysis/caprieval/capri.py       | 22 ++++----
 4 files changed, 77 insertions(+), 24 deletions(-)

diff --git a/src/haddock/core/defaults.py b/src/haddock/core/defaults.py
index b8bb70620..47804f464 100644
--- a/src/haddock/core/defaults.py
+++ b/src/haddock/core/defaults.py
@@ -21,13 +21,13 @@
 MODULE_PATH_NAME = "step_"
 """
 Module input and generated data will be stored in folder starting by
-this prefix"""
+this prefix."""
 
 MODULE_IO_FILE = "io.json"
-"""Default name for exchange module information file"""
+"""Default name for exchange module information file."""
 
 MAX_NUM_MODULES = 10000
-"""Temptative number of max allowed number of modules to execute"""
+"""Temptative number of max allowed number of modules to execute."""
 
 valid_run_dir_chars = string.ascii_letters + string.digits + "._-/\\"
 
@@ -40,12 +40,14 @@
 DATA_DIRNAME = "data"
 """Name given to the directory holding data."""
 
-CNS_MODULES = ["rigidbody",
-               "flexref",
-               "emscoring",
-               "mdscoring",
-               "mdref",
-               "emref"]
+CNS_MODULES = (
+    "rigidbody",
+    "flexref",
+    "emscoring",
+    "mdscoring",
+    "mdref",
+    "emref",
+    )
 """List of CNS modules available in HADDOCK3."""
 
 
diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py
index ca32bf706..be6b94982 100644
--- a/src/haddock/gear/prepare_run.py
+++ b/src/haddock/gear/prepare_run.py
@@ -15,7 +15,11 @@
 
 from haddock import EmptyPath, contact_us, haddock3_source_path, log
 from haddock.core.defaults import RUNDIR, max_molecules_allowed, DATA_DIRNAME
-from haddock.core.exceptions import ConfigurationError, ModuleError
+from haddock.core.exceptions import (
+    ConfigurationError,
+    ModuleError,
+    DependencyError,
+    )
 from haddock.core.typing import (
     Any,
     Callable,
@@ -290,6 +294,7 @@ def setup_run(
 
     if from_scratch:
         check_run_dir_exists(general_params[RUNDIR])
+        check_CNS_usage(modules_params)
 
     if scratch_rest0:
         check_mandatory_argments_are_present(general_params)
@@ -930,7 +935,10 @@ def check_run_dir_exists(run_dir: FilePath) -> None:
 
 def identify_modules(params: Iterable[str]) -> list[str]:
     """Identify keys (headings) belonging to HADDOCK3 modules."""
-    modules_keys = [k for k in params if get_module_name(k) in modules_category]
+    modules_keys = [
+        param_name for param_name in params
+        if get_module_name(param_name) in modules_category
+        ]
     return modules_keys
 
 
@@ -1295,3 +1303,41 @@ def update_step_names_in_file(
         text = text.replace(s1, s2)
     file_.write_text(text)
     return
+
+
+def check_CNS_usage(modules_params: ParamMap) -> None:
+    """Check that a topology module is run prior to modules requiring CNS.
+
+    Parameters
+    ----------
+    modules_params : ParamMap
+        Dict of modules parameters.
+        Only used to obtain ordered list of modules.
+
+    Raises
+    ------
+    DependencyError
+        Error thrown if topology not run before a CNS module.
+    """
+    from haddock.core.defaults import CNS_MODULES
+    generated_topology: bool = False
+    for _module_name in modules_params:
+        module_name = get_module_name(_module_name)
+        # Check if this module is a topology module
+        if modules_category[module_name] == "topology":
+            # Set the flag
+            generated_topology = True
+        # Check if this module is a CNS module (that require topology)
+        if module_name in CNS_MODULES:
+            # Check that topology was generated
+            if not generated_topology:
+                raise DependencyError(
+                    msg="A topology module should be used prior to CNS module.",
+                    module=module_name,
+                    dependency=", ".join([
+                        k for k, v in modules_category.items()
+                        if v == "topology"
+                        ])
+                    )
+            # We can stop here as either error raised or check passsed
+            break
diff --git a/src/haddock/libs/libworkflow.py b/src/haddock/libs/libworkflow.py
index d972f627c..6b00e0be7 100644
--- a/src/haddock/libs/libworkflow.py
+++ b/src/haddock/libs/libworkflow.py
@@ -110,8 +110,8 @@ def __init__(
         # Create the list of steps contained in this workflow
         self.steps: list[Step] = []
         _items = enumerate(modules_parameters.items(), start=start)
-        for num_stage, (stage_name, params) in _items:
-            stage_name = get_module_name(stage_name)
+        for num_stage, (_stage_name, params) in _items:
+            stage_name = get_module_name(_stage_name)
             log.info(f"Reading instructions step {num_stage}_{stage_name}")
 
             # updates the module's specific parameter with global parameters
@@ -161,7 +161,10 @@ def execute(self) -> None:
                 ]
             )
         module_lib = importlib.import_module(module_name)
-        self.module = module_lib.HaddockModule(order=self.order, path=self.working_path)
+        self.module = module_lib.HaddockModule(
+            order=self.order,
+            path=self.working_path,
+            )
 
         # Run module
         start = time()
diff --git a/src/haddock/modules/analysis/caprieval/capri.py b/src/haddock/modules/analysis/caprieval/capri.py
index 9dd2cae61..759d76823 100644
--- a/src/haddock/modules/analysis/caprieval/capri.py
+++ b/src/haddock/modules/analysis/caprieval/capri.py
@@ -1,13 +1,14 @@
 """CAPRI module."""
 
 import copy
+import json
 import os
 import shutil
 import tempfile
+
 from itertools import combinations
 from pathlib import Path
 
-
 os.environ["OPENBLAS_NUM_THREADS"] = "1"
 
 import numpy as np
@@ -26,8 +27,8 @@
     ParamDict,
     ParamMap,
     Union,
-    Type,
 )
+from haddock.gear.config import load as read_config
 from haddock.libs.libalign import (
     ALIGNError,
     calc_rmsd,
@@ -44,19 +45,21 @@
 
 
 WEIGHTS = ["w_elec", "w_vdw", "w_desolv", "w_bsa", "w_air"]
-import json
-
-from haddock.gear.config import load as read_config
 
 
-def get_previous_cns_step(sel_steps: list, st_order: int) -> Union[str, None]:
+def get_previous_cns_step(
+        sel_steps: list[str],
+        step_order: int,
+        ) -> Optional[str]:
     """
     Get the previous CNS step.
 
     Parameters
     ----------
-    run_path : Path
-        Path to the run folder.
+    sel_steps : list[str]
+        Selected steps.
+    step_order : int
+        Index of the step.
 
     Returns
     -------
@@ -67,7 +70,7 @@ def get_previous_cns_step(sel_steps: list, st_order: int) -> Union[str, None]:
     cns_step = None
     # just to be careful, remove steps with more than one underscore
     sel_steps = [step for step in sel_steps if step.count("_") == 1]
-    mod = min(st_order - 1, len(sel_steps) - 1)
+    mod = min(step_order - 1, len(sel_steps) - 1)
     # loop
     while mod > -1:
         st_name = sel_steps[mod].split("_")[1]
@@ -75,7 +78,6 @@ def get_previous_cns_step(sel_steps: list, st_order: int) -> Union[str, None]:
             cns_step = sel_steps[mod]
             break
         mod -= 1
-
     return cns_step
 
 

From 6e121fa84688eec2268fe9ee99066a6a472678fc Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Mon, 29 Jul 2024 16:13:50 +0200
Subject: [PATCH 30/32] test to the DependencyError during prepare_run

---
 tests/test_gear_prepare_run.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tests/test_gear_prepare_run.py b/tests/test_gear_prepare_run.py
index 4c03091a7..123671903 100644
--- a/tests/test_gear_prepare_run.py
+++ b/tests/test_gear_prepare_run.py
@@ -5,8 +5,10 @@
 
 import pytest
 
-from haddock.core.exceptions import ConfigurationError
+from haddock.core.defaults import CNS_MODULES
+from haddock.core.exceptions import ConfigurationError, DependencyError
 from haddock.gear.prepare_run import (
+    check_CNS_usage,
     check_if_path_exists,
     copy_molecules_to_topology,
     fuzzy_match,
@@ -370,3 +372,20 @@ def test_param_value_error(defaultparams, key, value):
     """
     with pytest.raises(ConfigurationError):
         validate_value(defaultparams, key, value)
+
+
+def test_check_CNS_usage():
+    """Test if check_CNS_usage is functional."""
+    # Case were topology is run before
+    check = check_CNS_usage({"topoaa": {}, "mdref": {}})
+    assert check is None
+    # Loop over CNS modules requiring topology to be accessible
+    for cns_module in CNS_MODULES:
+        # Case were topology is run after
+        with pytest.raises(DependencyError):
+            check_exception1 = check_CNS_usage({cns_module: {}, "topoaa": {}})
+            assert check_exception1 is None
+        # Case were topology not run at all
+        with pytest.raises(DependencyError):
+            check_exception2 = check_CNS_usage({cns_module: {}})
+            assert check_exception2 is None

From 364a5e460a05721fe44485b070411b75f6d12fb1 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Mon, 29 Jul 2024 16:22:47 +0200
Subject: [PATCH 31/32] fixing types

---
 src/haddock/modules/analysis/caprieval/capri.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/haddock/modules/analysis/caprieval/capri.py b/src/haddock/modules/analysis/caprieval/capri.py
index 759d76823..1381f9b87 100644
--- a/src/haddock/modules/analysis/caprieval/capri.py
+++ b/src/haddock/modules/analysis/caprieval/capri.py
@@ -1121,7 +1121,7 @@ def __init__(self, msg: str = "") -> None:
 
 def dump_weights(order: int) -> None:
     sel_steps = get_module_steps_folders(Path(".."))
-    cns_step = get_previous_cns_step(sel_steps=sel_steps, st_order=order)
+    cns_step = get_previous_cns_step(sel_steps=sel_steps, step_order=order)
     if cns_step:
         log.info(f"Found previous CNS step: {cns_step}")
         scoring_params_fname = save_scoring_weights(cns_step)

From 9f9a2d0764009ef9837d714ca1ae64183b87d2c5 Mon Sep 17 00:00:00 2001
From: VGPReys <v.g.p.reys@uu.nl>
Date: Tue, 30 Jul 2024 15:29:26 +0200
Subject: [PATCH 32/32] remove useless import

---
 src/haddock/modules/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/haddock/modules/__init__.py b/src/haddock/modules/__init__.py
index 084399d18..4c7d17d56 100644
--- a/src/haddock/modules/__init__.py
+++ b/src/haddock/modules/__init__.py
@@ -1,5 +1,6 @@
 """HADDOCK3 modules."""
 import re
+
 from abc import ABC, abstractmethod
 from contextlib import contextmanager, suppress
 from copy import deepcopy