From 43a21f805af8dd1846dd5d7e85cddcbe82bb136e Mon Sep 17 00:00:00 2001
From: Marcel Mueller <marcel.mueller@thch.uni-bonn.de>
Date: Mon, 17 Feb 2025 17:52:01 +0100
Subject: [PATCH 1/2] Fix inconsistencies in `ORCA` interface (#125)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix inconsistencies in ORCA interface

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

* make new default more consistent; allow setting to 'none'

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

* reduce printout as nobody sees it anyway

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

* forgot some return statements

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

---------

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>
---
 CHANGELOG.md                   |  2 ++
 mindlessgen.toml               |  4 ++--
 src/mindlessgen/prog/config.py | 18 ++++++++++++++----
 src/mindlessgen/qm/orca.py     | 10 ++++++----
 4 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0842e14..4a9032a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - dict keys for elemental compositions will now always be checked for validity
 - Renamed GP3-xTB to g-xTB
 - Moved constants and (empirical) parameters to the `data` module
+- Default for optimization cycles in the postprocessing step set to program default (convergence)
 
 ### Deprecated
 - Nothing will be printed while multiple molecules are generated in parallel, tqdm-based progress bar instead
@@ -20,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - support for TURBOMOLE as QM engine
 - updated the parallelization to work over the number of molecules
 - possibility to generate symmetrical molecules (choice from rotation, inversion, mirroring)
+- Number of optimization steps in the postprocessing part can be set to program default by `none`
 
 ### Fixed
 - version string is now correctly formatted and printed
diff --git a/mindlessgen.toml b/mindlessgen.toml
index 2c29872..53c288e 100644
--- a/mindlessgen.toml
+++ b/mindlessgen.toml
@@ -69,8 +69,8 @@ ncores = 2
 engine = "orca"
 # > Optimize geometry in the post-processing part. If `false`, only a single-point is conducted. Options: <bool>
 optimize = true
-# > Optimization cycles for the post-processing part. If not given, the program default is chosen. Options: <int>
-opt_cycles = 5
+# > Optimization cycles for the post-processing part. If not given or set to "none" or 0, the program default is chosen. Options: <int> or "none"
+opt_cycles = "none"
 # > Debug this step. Leads to more verbose output as soon as the post-processing part is reached. Options: <bool>
 # > If `debug` is true, the process is terminated after the first (successful or not) post-processing step.
 # > Note: This option is only relevant if the 'postprocess' option in the 'general' section is set to 'true'.
diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py
index 0162edd..6b5ea2c 100644
--- a/src/mindlessgen/prog/config.py
+++ b/src/mindlessgen/prog/config.py
@@ -741,7 +741,7 @@ class PostProcessConfig(BaseConfig):
 
     def __init__(self: PostProcessConfig) -> None:
         self._engine: str = "orca"
-        self._opt_cycles: int | None = 5
+        self._opt_cycles: int | None = None
         self._optimize: bool = True
         self._debug: bool = False
         self._ncores: int = 4
@@ -795,10 +795,20 @@ def opt_cycles(self, opt_cycles: int):
         """
         Set the optimization cycles for post-processing.
         """
-        if not isinstance(opt_cycles, int):
-            raise TypeError("Optimization cycles should be an integer.")
+        if not isinstance(opt_cycles, (int, str)):
+            raise TypeError("Optimization cycles can only be an integer or a string.")
+        if isinstance(opt_cycles, str):
+            if opt_cycles.lower() != "none":
+                raise ValueError(
+                    "Optimization cycles can only be an integer or 'none'."
+                )
+            self._opt_cycles = None
+            return
+        if opt_cycles == 0:
+            self._opt_cycles = None
+            return
         if opt_cycles < 0:
-            raise ValueError("Optimization cycles should be 0 or greater.")
+            raise ValueError("Optimization cycles can only be 0 or greater.")
         self._opt_cycles = opt_cycles
 
     @property
diff --git a/src/mindlessgen/qm/orca.py b/src/mindlessgen/qm/orca.py
index 31f2120..1aa8cd8 100644
--- a/src/mindlessgen/qm/orca.py
+++ b/src/mindlessgen/qm/orca.py
@@ -171,7 +171,8 @@ def _gen_input(
         """
         orca_input = f"! {self.cfg.functional} {self.cfg.basis}\n"
         orca_input += f"! DEFGRID{self.cfg.gridsize}\n"
-        orca_input += "! NoTRAH NoSOSCF SlowConv\n"
+        orca_input += "! MiniPrint\n"
+        orca_input += "! NoTRAH\n"
         # "! AutoAux" keyword for super-heavy elements as def2/J ends at Rn
         if any(atom >= 86 for atom in molecule.ati):
             orca_input += "! AutoAux\n"
@@ -179,9 +180,10 @@ def _gen_input(
             orca_input += "! OPT\n"
             if opt_cycles is not None:
                 orca_input += f"%geom MaxIter {opt_cycles} end\n"
-        orca_input += (
-            f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n\tConvergence Medium\nend\n"
-        )
+        orca_input += f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n"
+        if not optimization:
+            orca_input += "\tConvergence Medium\n"
+        orca_input += "end\n"
         orca_input += f"%pal nprocs {ncores} end\n\n"
         orca_input += f"* xyzfile {molecule.charge} {molecule.uhf + 1} {xyzfile}\n"
         return orca_input

From 0ce205a2eb270829273e63eaf710868f3f8ceb66 Mon Sep 17 00:00:00 2001
From: Leopold Seidler <100208101+lmseidler@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:43:28 +0100
Subject: [PATCH 2/2] Parallelization unnecessary external calls fix (#126)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* initial work

* new parallelization preliminary done

* small fix in tests

* commented unnecessary code

* updated construction sites

* hopefully fixed parallelization

* small test fix

* initial work

* new parallelization preliminary done

* small fix in tests

* commented unnecessary code

* updated construction sites

* hopefully fixed parallelization

* small test fix

* only print when verbosity > 0, otherwise nothing is printed (bad)

* pre-commit fixes

* added ncores to config + implemented setting ncores for external programs

* fixed tests

* added check for number of cores available vs needed

* pre-commit

* test fix

* fixed tm implementation

* updated main.py

* tqdm progress bar

* updated dependencies

* mypy types import

* moved warnings in correct bracket

* updated default config toml

* added final output of molecules and timing

* fixed time

* better time info

* print formatting

* shift block setup to parallel.py

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

* avoid UnboundLocalError

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

* some code formatting and printout adjustments

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

* shifted CHANGELOG entry to correct position

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

* update CODEOWNERS file

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

* parallelization unnecessary external calls in case of stop_event set fixed

* test fix

* extend stop_event check also for postprocessing part

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>

* Update src/mindlessgen/molecules/refinement.py

Co-authored-by: Marcel Mueller <marcel.mueller@thch.uni-bonn.de>

* coding practice

* Update src/mindlessgen/generator/main.py

Co-authored-by: Marcel Mueller <marcel.mueller@thch.uni-bonn.de>

* Update src/mindlessgen/generator/main.py

Co-authored-by: Marcel Mueller <marcel.mueller@thch.uni-bonn.de>

* Update src/mindlessgen/generator/main.py

Co-authored-by: Marcel Mueller <marcel.mueller@thch.uni-bonn.de>

---------

Signed-off-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>
Co-authored-by: Marcel Müller <marcel.mueller@thch.uni-bonn.de>
---
 CHANGELOG.md                             |  1 +
 src/mindlessgen/generator/main.py        | 32 +++++++++++++++++++++---
 src/mindlessgen/molecules/postprocess.py |  8 +++++-
 src/mindlessgen/molecules/refinement.py  | 10 +++++++-
 test/test_molecules/test_refinement.py   |  5 +++-
 5 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a9032a..c4fa527 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - precision (# significant digits) of the coordinate files (`get_coord_str` and `get_xyz_str`) increased from 7 to 14
 - catch encoding errors when reading `Turbomole._run_opt` output files
 - bug in the parallelization, leading to a dead `mindlessgen` execution as a consequence of not allowing the required number of cores
+- stop_event checked before every external call to avoid unnecessary executions
 
 ## [0.5.0] - 2024-12-16
 ### Changed
diff --git a/src/mindlessgen/generator/main.py b/src/mindlessgen/generator/main.py
index 7354b95..96657a6 100644
--- a/src/mindlessgen/generator/main.py
+++ b/src/mindlessgen/generator/main.py
@@ -147,6 +147,9 @@ def generator(config: ConfigManager) -> tuple[list[Molecule], int]:
         # a dynamic setting would also be thinkable and straightforward to implement
         tasks: list[Future[Molecule | None]] = []
         for block in blocks:
+            # Every block is tasked to find block.num_molecules sequentially,
+            # For every block there is only one single_molecule_generator active
+            # (the others wait for resources)
             for _ in range(block.num_molecules):
                 tasks.append(
                     executor.submit(
@@ -205,7 +208,7 @@ def single_molecule_generator(
     ncores: int,
 ) -> Molecule | None:
     """
-    Generate a single molecule (from start to finish).
+    Generate a single molecule (from start to finish). Returns None only if all cycles fail.
     """
 
     # Wait for enough cores (cores freed automatically upon leaving managed context)
@@ -258,7 +261,6 @@ def single_molecule_generator(
         if config.general.verbosity > 0:
             print(f"Written molecule file 'mlm_{optimized_molecule.name}.xyz'.\n")
     elif optimized_molecule is None:
-        # TODO: will this conflict with progress bar?
         warnings.warn(
             "Molecule generation including optimization (and postprocessing) "
             + f"failed for all cycles for molecule {molcount + 1}."
@@ -276,7 +278,12 @@ def single_molecule_step(
     cycle: int,
     stop_event: Event,
 ) -> Molecule | None:
-    """Execute one step in a single molecule generation"""
+    """
+    Execute one step in a single molecule generation.
+    Returns None if
+    ... stop_event is set at any point.
+    ... if the molecule generation failed for this trial.
+    """
 
     if stop_event.is_set():
         return None  # Exit early if a molecule has already been found
@@ -326,8 +333,15 @@ def single_molecule_step(
             config.generate,
             config.refine,
             resources_local,
+            stop_event,
             verbosity=config.general.verbosity,
         )
+        # NOTE: regarding parallelization: there can only be ONE external call running
+        # for the task that is set to use the maximum number of cores
+        # e.g. we have 4 cores available, xtb SP always uses 1, refine uses e.g. 2, postprocessing uses 4
+        # then only 1 postprocessing can run concurrently, 2 refinements, 4 xtb SP
+        # If multiple tasks run (e.g. 2 refinements) concurrently and the stop_event is set,
+        # the other tasks (the second refinement) will not get terminated
     except RuntimeError as e:
         if config.general.verbosity > 0:
             print(f"Refinement failed for cycle {cycle + 1}.")
@@ -338,6 +352,11 @@ def single_molecule_step(
         if config.refine.debug:
             stop_event.set()
 
+    # Catch any interrupted iterative optimization steps
+    # (None should only be returned (if not caught by an exception) if it got stopped early by the stop_event)
+    if optimized_molecule is None:
+        return None
+
     if config.general.symmetrization:
         try:
             optimized_molecule = structure_mod_model.get_symmetric_structure(
@@ -357,6 +376,7 @@ def single_molecule_step(
                 postprocess_engine,  # type: ignore
                 config.postprocess,
                 resources_local,
+                stop_event,
                 verbosity=config.general.verbosity,
             )
         except RuntimeError as e:
@@ -368,13 +388,17 @@ def single_molecule_step(
         finally:
             if config.postprocess.debug:
                 stop_event.set()  # Stop further runs if debugging of this step is enabled
+        # Catch any interrupted postprocessing steps
+        # (None should only be returned (if not caught by an exception) if it got stopped early by the stop_event)
+        if optimized_molecule is None:
+            return None
         if config.general.verbosity > 1:
             print("Postprocessing successful.")
 
     if not stop_event.is_set():
         stop_event.set()  # Signal other processes to stop
         return optimized_molecule
-    elif config.refine.debug or config.postprocess.debug:
+    if config.refine.debug or config.postprocess.debug:
         return optimized_molecule
     else:
         return None
diff --git a/src/mindlessgen/molecules/postprocess.py b/src/mindlessgen/molecules/postprocess.py
index 8e0e767..39579f9 100644
--- a/src/mindlessgen/molecules/postprocess.py
+++ b/src/mindlessgen/molecules/postprocess.py
@@ -2,6 +2,7 @@
 Postprocess the generated molecules.
 """
 
+from threading import Event
 from .molecule import Molecule
 from ..qm import QMMethod
 from ..prog import PostProcessConfig, ResourceMonitor
@@ -12,8 +13,9 @@ def postprocess_mol(
     engine: QMMethod,
     config: PostProcessConfig,
     resources_local: ResourceMonitor,
+    stop_event: Event,
     verbosity: int = 1,
-) -> Molecule:
+) -> Molecule | None:
     """
     Postprocess the generated molecule.
 
@@ -31,6 +33,8 @@ def postprocess_mol(
     if config.optimize:
         try:
             with resources_local.occupy_cores(config.ncores):
+                if stop_event.is_set():
+                    return None
                 postprocmol = engine.optimize(
                     mol,
                     max_cycles=config.opt_cycles,
@@ -42,6 +46,8 @@ def postprocess_mol(
     else:
         try:
             with resources_local.occupy_cores(config.ncores):
+                if stop_event.is_set():
+                    return None
                 engine.singlepoint(mol, config.ncores, verbosity=verbosity)
             postprocmol = mol
         except RuntimeError as e:
diff --git a/src/mindlessgen/molecules/refinement.py b/src/mindlessgen/molecules/refinement.py
index 7bb17fe..c851139 100644
--- a/src/mindlessgen/molecules/refinement.py
+++ b/src/mindlessgen/molecules/refinement.py
@@ -3,6 +3,7 @@
 to obtain finally a valid molecule.
 """
 
+from threading import Event
 import warnings
 from pathlib import Path
 import networkx as nx  # type: ignore
@@ -30,8 +31,9 @@ def iterative_optimization(
     config_generate: GenerateConfig,
     config_refine: RefineConfig,
     resources_local: ResourceMonitor,
+    stop_event: Event,
     verbosity: int = 1,
-) -> Molecule:
+) -> Molecule | None:
     """
     Iterative optimization and fragment detection.
     """
@@ -45,6 +47,8 @@ def iterative_optimization(
         # Run single points first, start optimization if scf converges
         try:
             with resources_local.occupy_cores(1):
+                if stop_event.is_set():
+                    return None
                 _ = engine.singlepoint(rev_mol, 1, verbosity)
         except RuntimeError as e:
             raise RuntimeError(
@@ -54,6 +58,8 @@ def iterative_optimization(
         # Optimize the current molecule
         try:
             with resources_local.occupy_cores(config_refine.ncores):
+                if stop_event.is_set():
+                    return None
                 rev_mol = engine.optimize(
                     rev_mol, config_refine.ncores, None, verbosity
                 )
@@ -161,6 +167,8 @@ def iterative_optimization(
 
     try:
         with resources_local.occupy_cores(1):
+            if stop_event.is_set():
+                return None
             gap_sufficient = engine.check_gap(
                 molecule=rev_mol,
                 threshold=config_refine.hlgap,
diff --git a/test/test_molecules/test_refinement.py b/test/test_molecules/test_refinement.py
index 583ef03..f2e6a5b 100644
--- a/test/test_molecules/test_refinement.py
+++ b/test/test_molecules/test_refinement.py
@@ -144,18 +144,21 @@ def test_iterative_optimization(mol_C13H14: Molecule, mol_C7H8: Molecule) -> Non
     else:
         raise NotImplementedError("Engine not implemented.")
     mol = mol_C13H14
-    with setup_managers(1, 1) as (_, _, resources):
+    with setup_managers(1, 1) as (_, manager, resources):
+        stop_event = manager.Event()
         mol_opt = iterative_optimization(
             mol,
             engine,
             config.generate,
             config.refine,
             resources,
+            stop_event,
             verbosity=2,
         )
     mol_ref = mol_C7H8
 
     # assert number of atoms in mol_opt is equal to number of atoms in mol_ref
+    assert mol_opt is not None
     assert mol_opt.num_atoms == mol_ref.num_atoms
     # assert that the coordinates of mol_opt are close to the coordinates of mol_ref
     assert np.allclose(mol_opt.xyz, mol_ref.xyz, atol=1e-4)