From 43a21f805af8dd1846dd5d7e85cddcbe82bb136e Mon Sep 17 00:00:00 2001 From: Marcel Mueller Date: Mon, 17 Feb 2025 17:52:01 +0100 Subject: [PATCH 1/2] Fix inconsistencies in `ORCA` interface (#125) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix inconsistencies in ORCA interface Signed-off-by: Marcel Müller * make new default more consistent; allow setting to 'none' Signed-off-by: Marcel Müller * reduce printout as nobody sees it anyway Signed-off-by: Marcel Müller * forgot some return statements Signed-off-by: Marcel Müller --------- Signed-off-by: Marcel Müller --- CHANGELOG.md | 2 ++ mindlessgen.toml | 4 ++-- src/mindlessgen/prog/config.py | 18 ++++++++++++++---- src/mindlessgen/qm/orca.py | 10 ++++++---- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0842e14..4a9032a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - dict keys for elemental compositions will now always be checked for validity - Renamed GP3-xTB to g-xTB - Moved constants and (empirical) parameters to the `data` module +- Default for optimization cycles in the postprocessing step set to program default (convergence) ### Deprecated - Nothing will be printed while multiple molecules are generated in parallel, tqdm-based progress bar instead @@ -20,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - support for TURBOMOLE as QM engine - updated the parallelization to work over the number of molecules - possibility to generate symmetrical molecules (choice from rotation, inversion, mirroring) +- Number of optimization steps in the postprocessing part can be set to program default by `none` ### Fixed - version string is now correctly formatted and printed diff --git a/mindlessgen.toml b/mindlessgen.toml index 2c29872..53c288e 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -69,8 +69,8 @@ ncores = 2 engine = "orca" # > Optimize geometry in the post-processing part. If `false`, only a single-point is conducted. Options: optimize = true -# > Optimization cycles for the post-processing part. If not given, the program default is chosen. Options: -opt_cycles = 5 +# > Optimization cycles for the post-processing part. If not given or set to "none" or 0, the program default is chosen. Options: or "none" +opt_cycles = "none" # > Debug this step. Leads to more verbose output as soon as the post-processing part is reached. Options: # > If `debug` is true, the process is terminated after the first (successful or not) post-processing step. # > Note: This option is only relevant if the 'postprocess' option in the 'general' section is set to 'true'. diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 0162edd..6b5ea2c 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -741,7 +741,7 @@ class PostProcessConfig(BaseConfig): def __init__(self: PostProcessConfig) -> None: self._engine: str = "orca" - self._opt_cycles: int | None = 5 + self._opt_cycles: int | None = None self._optimize: bool = True self._debug: bool = False self._ncores: int = 4 @@ -795,10 +795,20 @@ def opt_cycles(self, opt_cycles: int): """ Set the optimization cycles for post-processing. """ - if not isinstance(opt_cycles, int): - raise TypeError("Optimization cycles should be an integer.") + if not isinstance(opt_cycles, (int, str)): + raise TypeError("Optimization cycles can only be an integer or a string.") + if isinstance(opt_cycles, str): + if opt_cycles.lower() != "none": + raise ValueError( + "Optimization cycles can only be an integer or 'none'." + ) + self._opt_cycles = None + return + if opt_cycles == 0: + self._opt_cycles = None + return if opt_cycles < 0: - raise ValueError("Optimization cycles should be 0 or greater.") + raise ValueError("Optimization cycles can only be 0 or greater.") self._opt_cycles = opt_cycles @property diff --git a/src/mindlessgen/qm/orca.py b/src/mindlessgen/qm/orca.py index 31f2120..1aa8cd8 100644 --- a/src/mindlessgen/qm/orca.py +++ b/src/mindlessgen/qm/orca.py @@ -171,7 +171,8 @@ def _gen_input( """ orca_input = f"! {self.cfg.functional} {self.cfg.basis}\n" orca_input += f"! DEFGRID{self.cfg.gridsize}\n" - orca_input += "! NoTRAH NoSOSCF SlowConv\n" + orca_input += "! MiniPrint\n" + orca_input += "! NoTRAH\n" # "! AutoAux" keyword for super-heavy elements as def2/J ends at Rn if any(atom >= 86 for atom in molecule.ati): orca_input += "! AutoAux\n" @@ -179,9 +180,10 @@ def _gen_input( orca_input += "! OPT\n" if opt_cycles is not None: orca_input += f"%geom MaxIter {opt_cycles} end\n" - orca_input += ( - f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n\tConvergence Medium\nend\n" - ) + orca_input += f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n" + if not optimization: + orca_input += "\tConvergence Medium\n" + orca_input += "end\n" orca_input += f"%pal nprocs {ncores} end\n\n" orca_input += f"* xyzfile {molecule.charge} {molecule.uhf + 1} {xyzfile}\n" return orca_input From 0ce205a2eb270829273e63eaf710868f3f8ceb66 Mon Sep 17 00:00:00 2001 From: Leopold Seidler <100208101+lmseidler@users.noreply.github.com> Date: Wed, 19 Feb 2025 12:43:28 +0100 Subject: [PATCH 2/2] Parallelization unnecessary external calls fix (#126) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * initial work * new parallelization preliminary done * small fix in tests * commented unnecessary code * updated construction sites * hopefully fixed parallelization * small test fix * initial work * new parallelization preliminary done * small fix in tests * commented unnecessary code * updated construction sites * hopefully fixed parallelization * small test fix * only print when verbosity > 0, otherwise nothing is printed (bad) * pre-commit fixes * added ncores to config + implemented setting ncores for external programs * fixed tests * added check for number of cores available vs needed * pre-commit * test fix * fixed tm implementation * updated main.py * tqdm progress bar * updated dependencies * mypy types import * moved warnings in correct bracket * updated default config toml * added final output of molecules and timing * fixed time * better time info * print formatting * shift block setup to parallel.py Signed-off-by: Marcel Müller * avoid UnboundLocalError Signed-off-by: Marcel Müller * some code formatting and printout adjustments Signed-off-by: Marcel Müller * shifted CHANGELOG entry to correct position Signed-off-by: Marcel Müller * update CODEOWNERS file Signed-off-by: Marcel Müller * parallelization unnecessary external calls in case of stop_event set fixed * test fix * extend stop_event check also for postprocessing part Signed-off-by: Marcel Müller * Update src/mindlessgen/molecules/refinement.py Co-authored-by: Marcel Mueller * coding practice * Update src/mindlessgen/generator/main.py Co-authored-by: Marcel Mueller * Update src/mindlessgen/generator/main.py Co-authored-by: Marcel Mueller * Update src/mindlessgen/generator/main.py Co-authored-by: Marcel Mueller --------- Signed-off-by: Marcel Müller Co-authored-by: Marcel Müller --- CHANGELOG.md | 1 + src/mindlessgen/generator/main.py | 32 +++++++++++++++++++++--- src/mindlessgen/molecules/postprocess.py | 8 +++++- src/mindlessgen/molecules/refinement.py | 10 +++++++- test/test_molecules/test_refinement.py | 5 +++- 5 files changed, 49 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a9032a..c4fa527 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - precision (# significant digits) of the coordinate files (`get_coord_str` and `get_xyz_str`) increased from 7 to 14 - catch encoding errors when reading `Turbomole._run_opt` output files - bug in the parallelization, leading to a dead `mindlessgen` execution as a consequence of not allowing the required number of cores +- stop_event checked before every external call to avoid unnecessary executions ## [0.5.0] - 2024-12-16 ### Changed diff --git a/src/mindlessgen/generator/main.py b/src/mindlessgen/generator/main.py index 7354b95..96657a6 100644 --- a/src/mindlessgen/generator/main.py +++ b/src/mindlessgen/generator/main.py @@ -147,6 +147,9 @@ def generator(config: ConfigManager) -> tuple[list[Molecule], int]: # a dynamic setting would also be thinkable and straightforward to implement tasks: list[Future[Molecule | None]] = [] for block in blocks: + # Every block is tasked to find block.num_molecules sequentially, + # For every block there is only one single_molecule_generator active + # (the others wait for resources) for _ in range(block.num_molecules): tasks.append( executor.submit( @@ -205,7 +208,7 @@ def single_molecule_generator( ncores: int, ) -> Molecule | None: """ - Generate a single molecule (from start to finish). + Generate a single molecule (from start to finish). Returns None only if all cycles fail. """ # Wait for enough cores (cores freed automatically upon leaving managed context) @@ -258,7 +261,6 @@ def single_molecule_generator( if config.general.verbosity > 0: print(f"Written molecule file 'mlm_{optimized_molecule.name}.xyz'.\n") elif optimized_molecule is None: - # TODO: will this conflict with progress bar? warnings.warn( "Molecule generation including optimization (and postprocessing) " + f"failed for all cycles for molecule {molcount + 1}." @@ -276,7 +278,12 @@ def single_molecule_step( cycle: int, stop_event: Event, ) -> Molecule | None: - """Execute one step in a single molecule generation""" + """ + Execute one step in a single molecule generation. + Returns None if + ... stop_event is set at any point. + ... if the molecule generation failed for this trial. + """ if stop_event.is_set(): return None # Exit early if a molecule has already been found @@ -326,8 +333,15 @@ def single_molecule_step( config.generate, config.refine, resources_local, + stop_event, verbosity=config.general.verbosity, ) + # NOTE: regarding parallelization: there can only be ONE external call running + # for the task that is set to use the maximum number of cores + # e.g. we have 4 cores available, xtb SP always uses 1, refine uses e.g. 2, postprocessing uses 4 + # then only 1 postprocessing can run concurrently, 2 refinements, 4 xtb SP + # If multiple tasks run (e.g. 2 refinements) concurrently and the stop_event is set, + # the other tasks (the second refinement) will not get terminated except RuntimeError as e: if config.general.verbosity > 0: print(f"Refinement failed for cycle {cycle + 1}.") @@ -338,6 +352,11 @@ def single_molecule_step( if config.refine.debug: stop_event.set() + # Catch any interrupted iterative optimization steps + # (None should only be returned (if not caught by an exception) if it got stopped early by the stop_event) + if optimized_molecule is None: + return None + if config.general.symmetrization: try: optimized_molecule = structure_mod_model.get_symmetric_structure( @@ -357,6 +376,7 @@ def single_molecule_step( postprocess_engine, # type: ignore config.postprocess, resources_local, + stop_event, verbosity=config.general.verbosity, ) except RuntimeError as e: @@ -368,13 +388,17 @@ def single_molecule_step( finally: if config.postprocess.debug: stop_event.set() # Stop further runs if debugging of this step is enabled + # Catch any interrupted postprocessing steps + # (None should only be returned (if not caught by an exception) if it got stopped early by the stop_event) + if optimized_molecule is None: + return None if config.general.verbosity > 1: print("Postprocessing successful.") if not stop_event.is_set(): stop_event.set() # Signal other processes to stop return optimized_molecule - elif config.refine.debug or config.postprocess.debug: + if config.refine.debug or config.postprocess.debug: return optimized_molecule else: return None diff --git a/src/mindlessgen/molecules/postprocess.py b/src/mindlessgen/molecules/postprocess.py index 8e0e767..39579f9 100644 --- a/src/mindlessgen/molecules/postprocess.py +++ b/src/mindlessgen/molecules/postprocess.py @@ -2,6 +2,7 @@ Postprocess the generated molecules. """ +from threading import Event from .molecule import Molecule from ..qm import QMMethod from ..prog import PostProcessConfig, ResourceMonitor @@ -12,8 +13,9 @@ def postprocess_mol( engine: QMMethod, config: PostProcessConfig, resources_local: ResourceMonitor, + stop_event: Event, verbosity: int = 1, -) -> Molecule: +) -> Molecule | None: """ Postprocess the generated molecule. @@ -31,6 +33,8 @@ def postprocess_mol( if config.optimize: try: with resources_local.occupy_cores(config.ncores): + if stop_event.is_set(): + return None postprocmol = engine.optimize( mol, max_cycles=config.opt_cycles, @@ -42,6 +46,8 @@ def postprocess_mol( else: try: with resources_local.occupy_cores(config.ncores): + if stop_event.is_set(): + return None engine.singlepoint(mol, config.ncores, verbosity=verbosity) postprocmol = mol except RuntimeError as e: diff --git a/src/mindlessgen/molecules/refinement.py b/src/mindlessgen/molecules/refinement.py index 7bb17fe..c851139 100644 --- a/src/mindlessgen/molecules/refinement.py +++ b/src/mindlessgen/molecules/refinement.py @@ -3,6 +3,7 @@ to obtain finally a valid molecule. """ +from threading import Event import warnings from pathlib import Path import networkx as nx # type: ignore @@ -30,8 +31,9 @@ def iterative_optimization( config_generate: GenerateConfig, config_refine: RefineConfig, resources_local: ResourceMonitor, + stop_event: Event, verbosity: int = 1, -) -> Molecule: +) -> Molecule | None: """ Iterative optimization and fragment detection. """ @@ -45,6 +47,8 @@ def iterative_optimization( # Run single points first, start optimization if scf converges try: with resources_local.occupy_cores(1): + if stop_event.is_set(): + return None _ = engine.singlepoint(rev_mol, 1, verbosity) except RuntimeError as e: raise RuntimeError( @@ -54,6 +58,8 @@ def iterative_optimization( # Optimize the current molecule try: with resources_local.occupy_cores(config_refine.ncores): + if stop_event.is_set(): + return None rev_mol = engine.optimize( rev_mol, config_refine.ncores, None, verbosity ) @@ -161,6 +167,8 @@ def iterative_optimization( try: with resources_local.occupy_cores(1): + if stop_event.is_set(): + return None gap_sufficient = engine.check_gap( molecule=rev_mol, threshold=config_refine.hlgap, diff --git a/test/test_molecules/test_refinement.py b/test/test_molecules/test_refinement.py index 583ef03..f2e6a5b 100644 --- a/test/test_molecules/test_refinement.py +++ b/test/test_molecules/test_refinement.py @@ -144,18 +144,21 @@ def test_iterative_optimization(mol_C13H14: Molecule, mol_C7H8: Molecule) -> Non else: raise NotImplementedError("Engine not implemented.") mol = mol_C13H14 - with setup_managers(1, 1) as (_, _, resources): + with setup_managers(1, 1) as (_, manager, resources): + stop_event = manager.Event() mol_opt = iterative_optimization( mol, engine, config.generate, config.refine, resources, + stop_event, verbosity=2, ) mol_ref = mol_C7H8 # assert number of atoms in mol_opt is equal to number of atoms in mol_ref + assert mol_opt is not None assert mol_opt.num_atoms == mol_ref.num_atoms # assert that the coordinates of mol_opt are close to the coordinates of mol_ref assert np.allclose(mol_opt.xyz, mol_ref.xyz, atol=1e-4)