grimme-lab · marcelmbn · Feb 19, 2025 · Feb 17, 2025 · Feb 19, 2025 · Feb 19, 2025
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - dict keys for elemental compositions will now always be checked for validity
 - Renamed GP3-xTB to g-xTB
 - Moved constants and (empirical) parameters to the `data` module
+- Default for optimization cycles in the postprocessing step set to program default (convergence)
 
 ### Deprecated
 - Nothing will be printed while multiple molecules are generated in parallel, tqdm-based progress bar instead
@@ -20,12 +21,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - support for TURBOMOLE as QM engine
 - updated the parallelization to work over the number of molecules
 - possibility to generate symmetrical molecules (choice from rotation, inversion, mirroring)
+- Number of optimization steps in the postprocessing part can be set to program default by `none`
 
 ### Fixed
 - version string is now correctly formatted and printed
 - precision (# significant digits) of the coordinate files (`get_coord_str` and `get_xyz_str`) increased from 7 to 14
 - catch encoding errors when reading `Turbomole._run_opt` output files
 - bug in the parallelization, leading to a dead `mindlessgen` execution as a consequence of not allowing the required number of cores
+- stop_event checked before every external call to avoid unnecessary executions
 
 ## [0.5.0] - 2024-12-16
 ### Changed

@@ -69,8 +69,8 @@ ncores = 2
 engine = "orca"
 # > Optimize geometry in the post-processing part. If `false`, only a single-point is conducted. Options: <bool>
 optimize = true
-# > Optimization cycles for the post-processing part. If not given, the program default is chosen. Options: <int>
-opt_cycles = 5
+# > Optimization cycles for the post-processing part. If not given or set to "none" or 0, the program default is chosen. Options: <int> or "none"
+opt_cycles = "none"
 # > Debug this step. Leads to more verbose output as soon as the post-processing part is reached. Options: <bool>
 # > If `debug` is true, the process is terminated after the first (successful or not) post-processing step.
 # > Note: This option is only relevant if the 'postprocess' option in the 'general' section is set to 'true'.

@@ -147,6 +147,9 @@ def generator(config: ConfigManager) -> tuple[list[Molecule], int]:
         # a dynamic setting would also be thinkable and straightforward to implement
         tasks: list[Future[Molecule | None]] = []
         for block in blocks:
+            # Every block is tasked to find block.num_molecules sequentially,
+            # For every block there is only one single_molecule_generator active
+            # (the others wait for resources)
             for _ in range(block.num_molecules):
                 tasks.append(
                     executor.submit(
@@ -205,7 +208,7 @@ def single_molecule_generator(
     ncores: int,
 ) -> Molecule | None:
     """
-    Generate a single molecule (from start to finish).
+    Generate a single molecule (from start to finish). Returns None only if all cycles fail.
     """
 
     # Wait for enough cores (cores freed automatically upon leaving managed context)
@@ -268,7 +271,6 @@ def single_molecule_generator(
                     f"Written monomer file 'mlm_{optimized_molecule.name}_monomer.xyz'.\n"
                 )
     elif optimized_molecule is None:
-        # TODO: will this conflict with progress bar?
         warnings.warn(
             "Molecule generation including optimization (and postprocessing) "
             + f"failed for all cycles for molecule {molcount + 1}."
@@ -286,7 +288,12 @@ def single_molecule_step(
     cycle: int,
     stop_event: Event,
 ) -> Molecule | None:
-    """Execute one step in a single molecule generation"""
+    """
+    Execute one step in a single molecule generation.
+    Returns None if
+    ... stop_event is set at any point.
+    ... if the molecule generation failed for this trial.
+    """
 
     if stop_event.is_set():
         return None  # Exit early if a molecule has already been found
@@ -336,8 +343,15 @@ def single_molecule_step(
             config.generate,
             config.refine,
             resources_local,
+            stop_event,
             verbosity=config.general.verbosity,
         )
+        # NOTE: regarding parallelization: there can only be ONE external call running
+        # for the task that is set to use the maximum number of cores
+        # e.g. we have 4 cores available, xtb SP always uses 1, refine uses e.g. 2, postprocessing uses 4
+        # then only 1 postprocessing can run concurrently, 2 refinements, 4 xtb SP
+        # If multiple tasks run (e.g. 2 refinements) concurrently and the stop_event is set,
+        # the other tasks (the second refinement) will not get terminated
     except RuntimeError as e:
         if config.general.verbosity > 0:
             print(f"Refinement failed for cycle {cycle + 1}.")
@@ -348,6 +362,11 @@ def single_molecule_step(
         if config.refine.debug:
             stop_event.set()
 
+    # Catch any interrupted iterative optimization steps
+    # (None should only be returned (if not caught by an exception) if it got stopped early by the stop_event)
+    if optimized_molecule is None:
+        return None
+
     if config.general.symmetrization:
         try:
             optimized_molecule = structure_mod_model.get_symmetric_structure(
@@ -367,6 +386,7 @@ def single_molecule_step(
                 postprocess_engine,  # type: ignore
                 config.postprocess,
                 resources_local,
+                stop_event,
                 verbosity=config.general.verbosity,
             )
         except RuntimeError as e:
@@ -378,6 +398,10 @@ def single_molecule_step(
         finally:
             if config.postprocess.debug:
                 stop_event.set()  # Stop further runs if debugging of this step is enabled
+        # Catch any interrupted postprocessing steps
+        # (None should only be returned (if not caught by an exception) if it got stopped early by the stop_event)
+        if optimized_molecule is None:
+            return None
         if config.general.verbosity > 1:
             print("Postprocessing successful.")
 
@@ -422,7 +446,7 @@ def single_molecule_step(
     if not stop_event.is_set():
         stop_event.set()  # Signal other processes to stop
         return optimized_molecule
-    elif config.refine.debug or config.postprocess.debug:
+    if config.refine.debug or config.postprocess.debug:
         return optimized_molecule
     else:
         return None

@@ -2,6 +2,7 @@
 Postprocess the generated molecules.
 """
 
+from threading import Event
 from .molecule import Molecule
 from ..qm import QMMethod
 from ..prog import PostProcessConfig, ResourceMonitor
@@ -12,8 +13,9 @@ def postprocess_mol(
     engine: QMMethod,
     config: PostProcessConfig,
     resources_local: ResourceMonitor,
+    stop_event: Event,
     verbosity: int = 1,
-) -> Molecule:
+) -> Molecule | None:
     """
     Postprocess the generated molecule.
 
@@ -31,6 +33,8 @@ def postprocess_mol(
     if config.optimize:
         try:
             with resources_local.occupy_cores(config.ncores):
+                if stop_event.is_set():
+                    return None
                 postprocmol = engine.optimize(
                     mol,
                     max_cycles=config.opt_cycles,
@@ -42,6 +46,8 @@ def postprocess_mol(
     else:
         try:
             with resources_local.occupy_cores(config.ncores):
+                if stop_event.is_set():
+                    return None
                 engine.singlepoint(mol, config.ncores, verbosity=verbosity)
             postprocmol = mol
         except RuntimeError as e:

@@ -3,6 +3,7 @@
 to obtain finally a valid molecule.
 """
 
+from threading import Event
 import warnings
 from pathlib import Path
 import networkx as nx  # type: ignore
@@ -30,8 +31,9 @@ def iterative_optimization(
     config_generate: GenerateConfig,
     config_refine: RefineConfig,
     resources_local: ResourceMonitor,
+    stop_event: Event,
     verbosity: int = 1,
-) -> Molecule:
+) -> Molecule | None:
     """
     Iterative optimization and fragment detection.
     """
@@ -45,6 +47,8 @@ def iterative_optimization(
         # Run single points first, start optimization if scf converges
         try:
             with resources_local.occupy_cores(1):
+                if stop_event.is_set():
+                    return None
                 _ = engine.singlepoint(rev_mol, 1, verbosity)
         except RuntimeError as e:
             raise RuntimeError(
@@ -54,6 +58,8 @@ def iterative_optimization(
         # Optimize the current molecule
         try:
             with resources_local.occupy_cores(config_refine.ncores):
+                if stop_event.is_set():
+                    return None
                 rev_mol = engine.optimize(
                     rev_mol, config_refine.ncores, None, verbosity
                 )
@@ -161,6 +167,8 @@ def iterative_optimization(
 
     try:
         with resources_local.occupy_cores(1):
+            if stop_event.is_set():
+                return None
             gap_sufficient = engine.check_gap(
                 molecule=rev_mol,
                 threshold=config_refine.hlgap,

@@ -785,7 +785,7 @@ class PostProcessConfig(BaseConfig):
 
     def __init__(self: PostProcessConfig) -> None:
         self._engine: str = "orca"
-        self._opt_cycles: int | None = 5
+        self._opt_cycles: int | None = None
         self._optimize: bool = True
         self._debug: bool = False
         self._ncores: int = 4
@@ -839,10 +839,20 @@ def opt_cycles(self, opt_cycles: int):
         """
         Set the optimization cycles for post-processing.
         """
-        if not isinstance(opt_cycles, int):
-            raise TypeError("Optimization cycles should be an integer.")
+        if not isinstance(opt_cycles, (int, str)):
+            raise TypeError("Optimization cycles can only be an integer or a string.")
+        if isinstance(opt_cycles, str):
+            if opt_cycles.lower() != "none":
+                raise ValueError(
+                    "Optimization cycles can only be an integer or 'none'."
+                )
+            self._opt_cycles = None
+            return
+        if opt_cycles == 0:
+            self._opt_cycles = None
+            return
         if opt_cycles < 0:
-            raise ValueError("Optimization cycles should be 0 or greater.")
+            raise ValueError("Optimization cycles can only be 0 or greater.")
         self._opt_cycles = opt_cycles
 
     @property

@@ -171,17 +171,19 @@ def _gen_input(
         """
         orca_input = f"! {self.cfg.functional} {self.cfg.basis}\n"
         orca_input += f"! DEFGRID{self.cfg.gridsize}\n"
-        orca_input += "! NoTRAH NoSOSCF SlowConv\n"
+        orca_input += "! MiniPrint\n"
+        orca_input += "! NoTRAH\n"
         # "! AutoAux" keyword for super-heavy elements as def2/J ends at Rn
         if any(atom >= 86 for atom in molecule.ati):
             orca_input += "! AutoAux\n"
         if optimization:
             orca_input += "! OPT\n"
             if opt_cycles is not None:
                 orca_input += f"%geom MaxIter {opt_cycles} end\n"
-        orca_input += (
-            f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n\tConvergence Medium\nend\n"
-        )
+        orca_input += f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n"
+        if not optimization:
+            orca_input += "\tConvergence Medium\n"
+        orca_input += "end\n"
         orca_input += f"%pal nprocs {ncores} end\n\n"
         orca_input += f"* xyzfile {molecule.charge} {molecule.uhf + 1} {xyzfile}\n"
         return orca_input

@@ -144,18 +144,21 @@ def test_iterative_optimization(mol_C13H14: Molecule, mol_C7H8: Molecule) -> Non
     else:
         raise NotImplementedError("Engine not implemented.")
     mol = mol_C13H14
-    with setup_managers(1, 1) as (_, _, resources):
+    with setup_managers(1, 1) as (_, manager, resources):
+        stop_event = manager.Event()
         mol_opt = iterative_optimization(
             mol,
             engine,
             config.generate,
             config.refine,
             resources,
+            stop_event,
             verbosity=2,
         )
     mol_ref = mol_C7H8
 
     # assert number of atoms in mol_opt is equal to number of atoms in mol_ref
+    assert mol_opt is not None
     assert mol_opt.num_atoms == mol_ref.num_atoms
     # assert that the coordinates of mol_opt are close to the coordinates of mol_ref
     assert np.allclose(mol_opt.xyz, mol_ref.xyz, atol=1e-4)