Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev/gp3 ipea #127

Merged
merged 3 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- dict keys for elemental compositions will now always be checked for validity
- Renamed GP3-xTB to g-xTB
- Moved constants and (empirical) parameters to the `data` module
- Default for optimization cycles in the postprocessing step set to program default (convergence)

### Deprecated
- Nothing will be printed while multiple molecules are generated in parallel, tqdm-based progress bar instead
Expand All @@ -20,12 +21,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- support for TURBOMOLE as QM engine
- updated the parallelization to work over the number of molecules
- possibility to generate symmetrical molecules (choice from rotation, inversion, mirroring)
- Number of optimization steps in the postprocessing part can be set to program default by `none`

### Fixed
- version string is now correctly formatted and printed
- precision (# significant digits) of the coordinate files (`get_coord_str` and `get_xyz_str`) increased from 7 to 14
- catch encoding errors when reading `Turbomole._run_opt` output files
- bug in the parallelization, leading to a dead `mindlessgen` execution as a consequence of not allowing the required number of cores
- stop_event checked before every external call to avoid unnecessary executions

## [0.5.0] - 2024-12-16
### Changed
Expand Down
4 changes: 2 additions & 2 deletions mindlessgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ ncores = 2
engine = "orca"
# > Optimize geometry in the post-processing part. If `false`, only a single-point is conducted. Options: <bool>
optimize = true
# > Optimization cycles for the post-processing part. If not given, the program default is chosen. Options: <int>
opt_cycles = 5
# > Optimization cycles for the post-processing part. If not given or set to "none" or 0, the program default is chosen. Options: <int> or "none"
opt_cycles = "none"
# > Debug this step. Leads to more verbose output as soon as the post-processing part is reached. Options: <bool>
# > If `debug` is true, the process is terminated after the first (successful or not) post-processing step.
# > Note: This option is only relevant if the 'postprocess' option in the 'general' section is set to 'true'.
Expand Down
32 changes: 28 additions & 4 deletions src/mindlessgen/generator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ def generator(config: ConfigManager) -> tuple[list[Molecule], int]:
# a dynamic setting would also be thinkable and straightforward to implement
tasks: list[Future[Molecule | None]] = []
for block in blocks:
# Every block is tasked to find block.num_molecules sequentially,
# For every block there is only one single_molecule_generator active
# (the others wait for resources)
for _ in range(block.num_molecules):
tasks.append(
executor.submit(
Expand Down Expand Up @@ -205,7 +208,7 @@ def single_molecule_generator(
ncores: int,
) -> Molecule | None:
"""
Generate a single molecule (from start to finish).
Generate a single molecule (from start to finish). Returns None only if all cycles fail.
"""

# Wait for enough cores (cores freed automatically upon leaving managed context)
Expand Down Expand Up @@ -268,7 +271,6 @@ def single_molecule_generator(
f"Written monomer file 'mlm_{optimized_molecule.name}_monomer.xyz'.\n"
)
elif optimized_molecule is None:
# TODO: will this conflict with progress bar?
warnings.warn(
"Molecule generation including optimization (and postprocessing) "
+ f"failed for all cycles for molecule {molcount + 1}."
Expand All @@ -286,7 +288,12 @@ def single_molecule_step(
cycle: int,
stop_event: Event,
) -> Molecule | None:
"""Execute one step in a single molecule generation"""
"""
Execute one step in a single molecule generation.
Returns None if
... stop_event is set at any point.
... if the molecule generation failed for this trial.
"""

if stop_event.is_set():
return None # Exit early if a molecule has already been found
Expand Down Expand Up @@ -336,8 +343,15 @@ def single_molecule_step(
config.generate,
config.refine,
resources_local,
stop_event,
verbosity=config.general.verbosity,
)
# NOTE: regarding parallelization: there can only be ONE external call running
# for the task that is set to use the maximum number of cores
# e.g. we have 4 cores available, xtb SP always uses 1, refine uses e.g. 2, postprocessing uses 4
# then only 1 postprocessing can run concurrently, 2 refinements, 4 xtb SP
# If multiple tasks run (e.g. 2 refinements) concurrently and the stop_event is set,
# the other tasks (the second refinement) will not get terminated
except RuntimeError as e:
if config.general.verbosity > 0:
print(f"Refinement failed for cycle {cycle + 1}.")
Expand All @@ -348,6 +362,11 @@ def single_molecule_step(
if config.refine.debug:
stop_event.set()

# Catch any interrupted iterative optimization steps
# (None should only be returned (if not caught by an exception) if it got stopped early by the stop_event)
if optimized_molecule is None:
return None

if config.general.symmetrization:
try:
optimized_molecule = structure_mod_model.get_symmetric_structure(
Expand All @@ -367,6 +386,7 @@ def single_molecule_step(
postprocess_engine, # type: ignore
config.postprocess,
resources_local,
stop_event,
verbosity=config.general.verbosity,
)
except RuntimeError as e:
Expand All @@ -378,6 +398,10 @@ def single_molecule_step(
finally:
if config.postprocess.debug:
stop_event.set() # Stop further runs if debugging of this step is enabled
# Catch any interrupted postprocessing steps
# (None should only be returned (if not caught by an exception) if it got stopped early by the stop_event)
if optimized_molecule is None:
return None
if config.general.verbosity > 1:
print("Postprocessing successful.")

Expand Down Expand Up @@ -422,7 +446,7 @@ def single_molecule_step(
if not stop_event.is_set():
stop_event.set() # Signal other processes to stop
return optimized_molecule
elif config.refine.debug or config.postprocess.debug:
if config.refine.debug or config.postprocess.debug:
return optimized_molecule
else:
return None
Expand Down
8 changes: 7 additions & 1 deletion src/mindlessgen/molecules/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Postprocess the generated molecules.
"""

from threading import Event
from .molecule import Molecule
from ..qm import QMMethod
from ..prog import PostProcessConfig, ResourceMonitor
Expand All @@ -12,8 +13,9 @@ def postprocess_mol(
engine: QMMethod,
config: PostProcessConfig,
resources_local: ResourceMonitor,
stop_event: Event,
verbosity: int = 1,
) -> Molecule:
) -> Molecule | None:
"""
Postprocess the generated molecule.

Expand All @@ -31,6 +33,8 @@ def postprocess_mol(
if config.optimize:
try:
with resources_local.occupy_cores(config.ncores):
if stop_event.is_set():
return None
postprocmol = engine.optimize(
mol,
max_cycles=config.opt_cycles,
Expand All @@ -42,6 +46,8 @@ def postprocess_mol(
else:
try:
with resources_local.occupy_cores(config.ncores):
if stop_event.is_set():
return None
engine.singlepoint(mol, config.ncores, verbosity=verbosity)
postprocmol = mol
except RuntimeError as e:
Expand Down
10 changes: 9 additions & 1 deletion src/mindlessgen/molecules/refinement.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
to obtain finally a valid molecule.
"""

from threading import Event
import warnings
from pathlib import Path
import networkx as nx # type: ignore
Expand Down Expand Up @@ -30,8 +31,9 @@ def iterative_optimization(
config_generate: GenerateConfig,
config_refine: RefineConfig,
resources_local: ResourceMonitor,
stop_event: Event,
verbosity: int = 1,
) -> Molecule:
) -> Molecule | None:
"""
Iterative optimization and fragment detection.
"""
Expand All @@ -45,6 +47,8 @@ def iterative_optimization(
# Run single points first, start optimization if scf converges
try:
with resources_local.occupy_cores(1):
if stop_event.is_set():
return None
_ = engine.singlepoint(rev_mol, 1, verbosity)
except RuntimeError as e:
raise RuntimeError(
Expand All @@ -54,6 +58,8 @@ def iterative_optimization(
# Optimize the current molecule
try:
with resources_local.occupy_cores(config_refine.ncores):
if stop_event.is_set():
return None
rev_mol = engine.optimize(
rev_mol, config_refine.ncores, None, verbosity
)
Expand Down Expand Up @@ -161,6 +167,8 @@ def iterative_optimization(

try:
with resources_local.occupy_cores(1):
if stop_event.is_set():
return None
gap_sufficient = engine.check_gap(
molecule=rev_mol,
threshold=config_refine.hlgap,
Expand Down
18 changes: 14 additions & 4 deletions src/mindlessgen/prog/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,7 +785,7 @@ class PostProcessConfig(BaseConfig):

def __init__(self: PostProcessConfig) -> None:
self._engine: str = "orca"
self._opt_cycles: int | None = 5
self._opt_cycles: int | None = None
self._optimize: bool = True
self._debug: bool = False
self._ncores: int = 4
Expand Down Expand Up @@ -839,10 +839,20 @@ def opt_cycles(self, opt_cycles: int):
"""
Set the optimization cycles for post-processing.
"""
if not isinstance(opt_cycles, int):
raise TypeError("Optimization cycles should be an integer.")
if not isinstance(opt_cycles, (int, str)):
raise TypeError("Optimization cycles can only be an integer or a string.")
if isinstance(opt_cycles, str):
if opt_cycles.lower() != "none":
raise ValueError(
"Optimization cycles can only be an integer or 'none'."
)
self._opt_cycles = None
return
if opt_cycles == 0:
self._opt_cycles = None
return
if opt_cycles < 0:
raise ValueError("Optimization cycles should be 0 or greater.")
raise ValueError("Optimization cycles can only be 0 or greater.")
self._opt_cycles = opt_cycles

@property
Expand Down
10 changes: 6 additions & 4 deletions src/mindlessgen/qm/orca.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,17 +171,19 @@ def _gen_input(
"""
orca_input = f"! {self.cfg.functional} {self.cfg.basis}\n"
orca_input += f"! DEFGRID{self.cfg.gridsize}\n"
orca_input += "! NoTRAH NoSOSCF SlowConv\n"
orca_input += "! MiniPrint\n"
orca_input += "! NoTRAH\n"
# "! AutoAux" keyword for super-heavy elements as def2/J ends at Rn
if any(atom >= 86 for atom in molecule.ati):
orca_input += "! AutoAux\n"
if optimization:
orca_input += "! OPT\n"
if opt_cycles is not None:
orca_input += f"%geom MaxIter {opt_cycles} end\n"
orca_input += (
f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n\tConvergence Medium\nend\n"
)
orca_input += f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n"
if not optimization:
orca_input += "\tConvergence Medium\n"
orca_input += "end\n"
orca_input += f"%pal nprocs {ncores} end\n\n"
orca_input += f"* xyzfile {molecule.charge} {molecule.uhf + 1} {xyzfile}\n"
return orca_input
Expand Down
5 changes: 4 additions & 1 deletion test/test_molecules/test_refinement.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,18 +144,21 @@ def test_iterative_optimization(mol_C13H14: Molecule, mol_C7H8: Molecule) -> Non
else:
raise NotImplementedError("Engine not implemented.")
mol = mol_C13H14
with setup_managers(1, 1) as (_, _, resources):
with setup_managers(1, 1) as (_, manager, resources):
stop_event = manager.Event()
mol_opt = iterative_optimization(
mol,
engine,
config.generate,
config.refine,
resources,
stop_event,
verbosity=2,
)
mol_ref = mol_C7H8

# assert number of atoms in mol_opt is equal to number of atoms in mol_ref
assert mol_opt is not None
assert mol_opt.num_atoms == mol_ref.num_atoms
# assert that the coordinates of mol_opt are close to the coordinates of mol_ref
assert np.allclose(mol_opt.xyz, mol_ref.xyz, atol=1e-4)