From 8795858c2970ba30e31c728dbb256653a8a28412 Mon Sep 17 00:00:00 2001 From: Reiher Research Group Date: Mon, 20 Nov 2023 12:19:07 +0100 Subject: [PATCH] Release 1.3.0 --- .gitignore | 7 + CHANGELOG.rst | 24 + LICENSE.txt | 2 +- README.rst | 2 +- conanfile.txt | 18 + .../{singularity => apptainer}/README.rst | 27 +- .../{singularity => apptainer}/puffin.def | 4 +- container/docker/Dockerfile | 25 +- dev | 2 +- docs/source/conf.py | 2 +- docs/source/programs.rst | 4 + requirements-dev.txt | 4 - requirements.txt | 6 + scine_puffin/__init__.py | 2 +- scine_puffin/__main__.py | 2 +- scine_puffin/_version.py | 4 +- scine_puffin/bootstrap.py | 25 +- scine_puffin/config.py | 58 +- scine_puffin/daemon.py | 2 +- scine_puffin/jobloop.py | 30 +- scine_puffin/jobs/__init__.py | 2 +- scine_puffin/jobs/conformers.py | 2 +- .../final_conformer_deduplication.py.depr | 2 +- .../jobs/deprecated/rdkit_conformers.py.depr | 2 +- scine_puffin/jobs/gaussian_charge_model_5.py | 2 +- scine_puffin/jobs/graph.py | 8 +- scine_puffin/jobs/kinetx_kinetic_modeling.py | 129 +- .../jobs/orca_geometry_optimization.py | 4 +- scine_puffin/jobs/rms_kinetic_modeling.py | 302 +++++ scine_puffin/jobs/scine_afir.py | 7 +- scine_puffin/jobs/scine_bond_orders.py | 2 +- .../jobs/scine_bspline_optimization.py | 283 ++-- scine_puffin/jobs/scine_conceptual_dft.py | 2 +- scine_puffin/jobs/scine_dissociation_cut.py | 20 +- .../jobs/scine_geometry_optimization.py | 42 +- .../jobs/scine_geometry_validation.py | 278 ++++ scine_puffin/jobs/scine_hessian.py | 2 +- scine_puffin/jobs/scine_irc_scan.py | 11 +- scine_puffin/jobs/scine_react_complex_afir.py | 66 +- scine_puffin/jobs/scine_react_complex_nt.py | 90 +- scine_puffin/jobs/scine_react_complex_nt2.py | 86 +- scine_puffin/jobs/scine_single_point.py | 2 +- scine_puffin/jobs/scine_step_refinement.py | 107 +- scine_puffin/jobs/scine_ts_optimization.py | 2 +- scine_puffin/jobs/sleep.py | 2 +- scine_puffin/jobs/swoose_qmmm_forces.py | 2 +- scine_puffin/jobs/templates/__init__.py | 2 +- scine_puffin/jobs/templates/job.py | 16 +- .../jobs/templates/kinetic_modeling_jobs.py | 119 ++ .../jobs/templates/scine_connectivity_job.py | 202 ++- .../jobs/templates/scine_hessian_job.py | 25 +- scine_puffin/jobs/templates/scine_job.py | 76 +- .../jobs/templates/scine_observers.py | 2 +- .../jobs/templates/scine_optimization_job.py | 77 +- .../jobs/templates/scine_react_job.py | 1193 +++++++++++++---- scine_puffin/jobs/turbomole_bond_orders.py | 2 +- .../jobs/turbomole_geometry_optimization.py | 4 +- scine_puffin/jobs/turbomole_hessian.py | 2 +- scine_puffin/jobs/turbomole_single_point.py | 2 +- scine_puffin/programs/__init__.py | 2 +- scine_puffin/programs/ams.py | 44 + scine_puffin/programs/core.py | 38 + scine_puffin/programs/cp2k.py | 2 +- scine_puffin/programs/database.py | 2 +- .../programs/deprecated/rdkit.py.depr | 2 +- scine_puffin/programs/gaussian.py | 2 +- scine_puffin/programs/kinetx.py | 2 +- scine_puffin/programs/molassembler.py | 2 +- scine_puffin/programs/mrcc.py | 40 + scine_puffin/programs/orca.py | 2 +- scine_puffin/programs/parrot.py | 37 + scine_puffin/programs/program.py | 74 +- scine_puffin/programs/readuct.py | 2 +- scine_puffin/programs/rms.py | 104 ++ scine_puffin/programs/serenity.py | 2 +- scine_puffin/programs/sparrow.py | 2 +- scine_puffin/programs/swoose.py | 2 +- scine_puffin/programs/turbomole.py | 2 +- scine_puffin/programs/utils.py | 2 +- scine_puffin/programs/xtb.py | 2 +- scine_puffin/tests/__init__.py | 2 +- scine_puffin/tests/conftest.py | 18 + scine_puffin/tests/cp2k/__init__.py | 2 +- scine_puffin/tests/cp2k/cp2k_test.py | 2 +- scine_puffin/tests/daemon_test.py | 2 +- scine_puffin/tests/db_setup.py | 41 +- scine_puffin/tests/jobs/__init__.py | 2 +- scine_puffin/tests/jobs/test_conformers.py | 2 +- .../jobs/test_gaussian_cm5_charges_job.py | 3 +- scine_puffin/tests/jobs/test_graph.py | 2 +- .../jobs/test_kinetx_kinetic_modeling_job.py | 6 +- .../test_orca_geometry_optimization_job.py | 3 +- .../tests/jobs/test_rms_input_file_creator.py | 119 ++ .../jobs/test_rms_kinetic_modeling_job.py | 417 ++++++ scine_puffin/tests/jobs/test_scine_afir.py | 8 +- .../tests/jobs/test_scine_bond_orders_job.py | 2 +- .../jobs/test_scine_bspline_optimization.py | 129 +- .../jobs/test_scine_dissociation_cut_job.py | 6 +- .../test_scine_geometry_optimization_job.py | 6 +- .../test_scine_geometry_validation_job.py | 587 ++++++++ scine_puffin/tests/jobs/test_scine_hessian.py | 2 +- .../tests/jobs/test_scine_irc_scan_job.py | 2 +- .../jobs/test_scine_react_complex_afir_job.py | 4 +- .../jobs/test_scine_react_complex_nt2_job.py | 152 ++- ..._scine_react_complex_nt2_propensity_job.py | 215 +++ .../jobs/test_scine_react_complex_nt_job.py | 347 ++++- .../tests/jobs/test_scine_react_job.py | 2 +- .../tests/jobs/test_scine_single_point_job.py | 2 +- .../jobs/test_scine_step_refinement_job.py | 10 +- .../jobs/test_scine_ts_optimization_job.py | 2 +- scine_puffin/tests/jobs/test_sleep_job.py | 2 +- .../tests/jobs/test_turbomole_bond_orders.py | 3 +- ...est_turbomole_geometry_optimization_job.py | 3 +- .../tests/jobs/test_turbomole_hessian.py | 3 +- .../jobs/test_turbomole_single_point_job.py | 3 +- scine_puffin/tests/masm_info_test.py | 110 +- scine_puffin/tests/mrcc/__init__.py | 6 + scine_puffin/tests/mrcc/mrcc_test.py | 56 + scine_puffin/tests/resources/FeO_H2.json | 5 + scine_puffin/tests/resources/FeO_H2.xyz | 6 + scine_puffin/tests/resources/FeO_H2_lhs.json | 5 + scine_puffin/tests/resources/FeO_H2_lhs.xyz | 6 + scine_puffin/tests/resources/__init__.py | 2 +- .../resources/acetal_methanol_complex.xyz | 32 + scine_puffin/tests/resources/au.json | 5 + scine_puffin/tests/resources/au.xyz | 29 + scine_puffin/tests/resources/au_complex.xyz | 31 + scine_puffin/tests/resources/h2.json | 5 + scine_puffin/tests/resources/h2.xyz | 4 + scine_puffin/tests/resources/h2o2.json | 5 + scine_puffin/tests/resources/h2o2.xyz | 6 + .../tests/resources/h2o2_distorted.xyz | 6 + scine_puffin/tests/resources/hio3.json | 5 + scine_puffin/tests/resources/hio3.xyz | 7 + .../resources/methanol_enamine_complex.xyz | 32 + scine_puffin/tests/resources/peroxide.json | 5 + scine_puffin/tests/resources/peroxide.xyz | 12 + .../tests/resources/water_distorted.xyz | 5 + .../tests/resources/water_distorted_2.xyz | 5 + scine_puffin/tests/testcases.py | 40 +- scine_puffin/utilities/__init__.py | 2 +- .../utilities/compound_and_flask_helpers.py | 2 +- .../kinetic_modeling_sensitivity_analysis.py | 576 ++++++++ scine_puffin/utilities/masm_helper.py | 56 +- scine_puffin/utilities/program_helper.py | 4 +- scine_puffin/utilities/properties.py | 2 +- .../utilities/reaction_transfer_helper.py | 247 ++++ .../utilities/rms_input_file_creator.py | 440 ++++++ scine_puffin/utilities/rms_kinetic_model.py | 542 ++++++++ scine_puffin/utilities/scine_helper.py | 8 +- scine_puffin/utilities/surface_helper.py | 111 ++ scine_puffin/utilities/transfer_helper.py | 91 ++ scine_puffin/utilities/turbomole_helper.py | 2 +- scripts/rms/build_rms.sh | 57 + scripts/rms/scine2rms.py | 84 ++ setup.cfg | 4 +- setup.py | 4 +- 157 files changed, 7383 insertions(+), 1186 deletions(-) create mode 100644 conanfile.txt rename container/{singularity => apptainer}/README.rst (66%) rename container/{singularity => apptainer}/puffin.def (97%) create mode 100644 scine_puffin/jobs/rms_kinetic_modeling.py create mode 100644 scine_puffin/jobs/scine_geometry_validation.py create mode 100644 scine_puffin/jobs/templates/kinetic_modeling_jobs.py create mode 100644 scine_puffin/programs/ams.py create mode 100644 scine_puffin/programs/core.py create mode 100644 scine_puffin/programs/mrcc.py create mode 100644 scine_puffin/programs/parrot.py create mode 100644 scine_puffin/programs/rms.py create mode 100644 scine_puffin/tests/conftest.py create mode 100644 scine_puffin/tests/jobs/test_rms_input_file_creator.py create mode 100644 scine_puffin/tests/jobs/test_rms_kinetic_modeling_job.py create mode 100644 scine_puffin/tests/jobs/test_scine_geometry_validation_job.py create mode 100644 scine_puffin/tests/jobs/test_scine_react_complex_nt2_propensity_job.py create mode 100644 scine_puffin/tests/mrcc/__init__.py create mode 100644 scine_puffin/tests/mrcc/mrcc_test.py create mode 100644 scine_puffin/tests/resources/FeO_H2.json create mode 100644 scine_puffin/tests/resources/FeO_H2.xyz create mode 100644 scine_puffin/tests/resources/FeO_H2_lhs.json create mode 100644 scine_puffin/tests/resources/FeO_H2_lhs.xyz create mode 100644 scine_puffin/tests/resources/acetal_methanol_complex.xyz create mode 100644 scine_puffin/tests/resources/au.json create mode 100644 scine_puffin/tests/resources/au.xyz create mode 100644 scine_puffin/tests/resources/au_complex.xyz create mode 100644 scine_puffin/tests/resources/h2.json create mode 100644 scine_puffin/tests/resources/h2.xyz create mode 100644 scine_puffin/tests/resources/h2o2.json create mode 100644 scine_puffin/tests/resources/h2o2.xyz create mode 100644 scine_puffin/tests/resources/h2o2_distorted.xyz create mode 100644 scine_puffin/tests/resources/hio3.json create mode 100644 scine_puffin/tests/resources/hio3.xyz create mode 100644 scine_puffin/tests/resources/methanol_enamine_complex.xyz create mode 100644 scine_puffin/tests/resources/peroxide.json create mode 100644 scine_puffin/tests/resources/peroxide.xyz create mode 100644 scine_puffin/tests/resources/water_distorted.xyz create mode 100644 scine_puffin/tests/resources/water_distorted_2.xyz create mode 100644 scine_puffin/utilities/kinetic_modeling_sensitivity_analysis.py create mode 100644 scine_puffin/utilities/reaction_transfer_helper.py create mode 100644 scine_puffin/utilities/rms_input_file_creator.py create mode 100644 scine_puffin/utilities/rms_kinetic_model.py create mode 100644 scine_puffin/utilities/surface_helper.py create mode 100644 scine_puffin/utilities/transfer_helper.py create mode 100755 scripts/rms/build_rms.sh create mode 100644 scripts/rms/scine2rms.py diff --git a/.gitignore b/.gitignore index 6160714..81d1b79 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,10 @@ docs/source/generated/ #VSCode .vscode/ + +# unittests +puffin_unittest_scratch/ + +# puffin instance +puffin.yaml +puffin.sh diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6ba68be..bf28d03 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,30 @@ Changelog ========= +Release 1.3.0 +------------- + +New Features: + - Store found elementary step even if none of the endpoints corresponds to the initial starting structures + - Add restart information with valid TS for jobs trying to find new elementary steps, where the IRC failed to produce different endpoints + - Consider potential surface structures for label determination of new structures + - Logic to transfer indices information and other complex properties from reactants to products + - Save close lying spin multiplicities and allow to manipulate exact spin propensity + check behavior with added settings + +New Jobs: + - Microkinetic modeling with the program Reaction Mechanism Simulator. + +New interfaced programs + - AMS via SCINE AMS Wrapper + - MRCC (release version March 2022) + +Bug fixes: + - Ensure that `only_distance_connectivity` is adhered in all reaction jobs + +Other: + - Update address in license + Release 1.2.0 ------------- diff --git a/LICENSE.txt b/LICENSE.txt index c548b5d..93f9876 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/README.rst b/README.rst index 140167a..e76f85d 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ Introduction ------------ -SCINE Puffin is a calculation handler for SCINE Chemoton. Thus, it bridges the gap between high level exploration jobs for reaction networks and basic quantum chemical calculations. Making use of the abstractions defined the SCINE Core and SCINE Utilities modules it provides the means to define and execute jobs that generate new data for reaction networks. SCINE Puffin is designed as an instanced runner that, once bootstrapped, continuously processes requested jobs. It is built to work in containerized environments (Docker, Singularity). +SCINE Puffin is a calculation handler for SCINE Chemoton. Thus, it bridges the gap between high level exploration jobs for reaction networks and basic quantum chemical calculations. Making use of the abstractions defined the SCINE Core and SCINE Utilities modules it provides the means to define and execute jobs that generate new data for reaction networks. SCINE Puffin is designed as an instanced runner that, once bootstrapped, continuously processes requested jobs. It is built to work in containerized environments (Docker, Apptainer/Singularity). License and Copyright Information --------------------------------- diff --git a/conanfile.txt b/conanfile.txt new file mode 100644 index 0000000..be64a01 --- /dev/null +++ b/conanfile.txt @@ -0,0 +1,18 @@ +[requires] +scine_utilities/9.0.0@ +scine_molassembler/2.0.1@ +scine_database/1.3.0@ +scine_xtb_wrapper/3.0.0@ +scine_sparrow/5.0.0@ +scine_readuct/5.1.0@ + +[options] +scine_utilities:python=True +scine_molassembler:python=True +scine_database:python=True +scine_sparrow:python=True +scine_xtb_wrapper:python=True +scine_readuct:python=True + +[generators] +virtualenv_python diff --git a/container/singularity/README.rst b/container/apptainer/README.rst similarity index 66% rename from container/singularity/README.rst rename to container/apptainer/README.rst index 6d6c289..7e651c1 100644 --- a/container/singularity/README.rst +++ b/container/apptainer/README.rst @@ -2,17 +2,18 @@ Basic Container Usage --------------------- In essence, these two commands should be enough to run a basic ``Puffin`` -instance inside a Singularity image. +instance inside an Apptainer (formerly Singularity) image. One to build the container: + .. code-block:: bash - singularity build puffin.sif puffin.def + apptainer build puffin.sif puffin.def and one to run the generated image: .. code-block:: bash - singularity run puffin.sif + apptainer run puffin.sif The build command may require slight alterations to the ``.def`` file first, as the file may reference a user's SSH keys in order to allow the cloning of some @@ -20,21 +21,21 @@ the file may reference a user's SSH keys in order to allow the cloning of some For some users the ``--fakeroot`` option may be more comfortable than using a ``sudo`` preface, but keep in mind that this option is not guaranteed to work, depending on the subordinate UID mapping configured for you on the machine you're working on. -Also, it may be required to make Singularity build the image in +Also, it may be required to make Apptainer build the image in a custom temporary directory if the default ``/tmp`` is not big enough. A slightly different build command may thus look like this: .. code-block:: bash - SINGULARITY_TMPDIR=/scratch/tmp singularity build --fakeroot puffin.sif puffin.def + APPTAINER_TMPDIR=/scratch/tmp apptainer build --fakeroot puffin.sif puffin.def The run command will have to be changed depending on the use case. It may be required to change some of the settings of the Puffin instance running inside the container. To this end, environment variables for Puffin can be set. These variables need to be present inside the container. -With Singularity versions greater than 3.6 it is possible to use the -``--env`` or ``--env-file`` argument to the run command. The more general way is -setting ``SINGULARITYENV_`` variables on the host machine. +It is possible to use the ``--env`` or ``--env-file`` argument to the run command. +However, the more general way is setting ``APPTAINERENV_`` variables on the host +machine. Furthermore it is required to mount two folders into the image, both are scratch direcories, one for ``.log`` and ``.pid`` files (``/socket``) and one for the @@ -44,10 +45,10 @@ A more complete run could thus look like this: .. code-block:: bash - singularity build puffin.sif puffin.def - export SINGULARITYENV_PUFFIN_DATABASE_NAME=ath_dft - export SINGULARITYENV_PUFFIN_DATABASE_PORT=27001 - export SINGULARITYENV_PUFFIN_DATABASE_IP=129.132.118.83 - singularity run --bind /scratch/puffin:/socket \ + apptainer build puffin.sif puffin.def + export APPTAINERENV_PUFFIN_DATABASE_NAME=ath_dft + export APPTAINERENV_PUFFIN_DATABASE_PORT=27001 + export APPTAINERENV_PUFFIN_DATABASE_IP=129.132.118.83 + apptainer run --bind /scratch/puffin:/socket \ --bind /scratch/puffin/jobs:/jobs \ puffin.sif diff --git a/container/singularity/puffin.def b/container/apptainer/puffin.def similarity index 97% rename from container/singularity/puffin.def rename to container/apptainer/puffin.def index d178222..a4c8322 100644 --- a/container/singularity/puffin.def +++ b/container/apptainer/puffin.def @@ -42,11 +42,12 @@ From: ubuntu:focal echo 'export MKLROOT=/opt/intel/oneapi/mkl/latest' >>$SINGULARITY_ENVIRONMENT # Install Cython - pip3 install Cython pythran + pip3 install Cython==0.29.36 pythran # Install numpy git clone https://github.com/numpy/numpy.git numpy cd numpy + git checkout v1.24.2 git submodule update --init cp site.cfg.example site.cfg echo "\n[mkl]" >> site.cfg @@ -62,6 +63,7 @@ From: ubuntu:focal # Install scipy git clone https://github.com/scipy/scipy.git scipy cd scipy + git checkout v1.10.1 git submodule update --init python3 setup.py build python3 setup.py install diff --git a/container/docker/Dockerfile b/container/docker/Dockerfile index d4b1a3d..14a1244 100644 --- a/container/docker/Dockerfile +++ b/container/docker/Dockerfile @@ -30,28 +30,31 @@ ENV LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH ENV MKLROOT=/opt/intel/oneapi/mkl/latest # Install Cython -RUN pip3 install Cython pythran +RUN pip3 install Cython==0.29.36 pythran # Install numpy +WORKDIR / RUN git clone https://github.com/numpy/numpy.git numpy -RUN cd numpy ; \ - cp site.cfg.example site.cfg ; \ +WORKDIR /numpy +RUN git checkout v1.24.2 +RUN git submodule update --init +RUN cp site.cfg.example site.cfg ; \ echo "\n[mkl]" >> site.cfg ; \ echo "include_dirs = /opt/intel/oneapi/mkl/latest/lib/intel64/" >> site.cfg ; \ echo "library_dirs = /opt/intel/oneapi/mkl/latest/lib/intel64/" >> site.cfg ; \ echo "mkl_libs = mkl_rt" >> site.cfg ; \ echo "lapack_libs =" >> site.cfg -RUN cd numpy ; git submodule update --init -RUN cd numpy ; python3 setup.py build --fcompiler=gnu95 -RUN cd numpy ; python3 setup.py install -RUN rm -rf numpy +RUN python3 setup.py build --fcompiler=gnu95 +RUN python3 setup.py install # Install scipy +WORKDIR / RUN git clone https://github.com/scipy/scipy.git scipy -RUN cd scipy ; git submodule update --init -RUN cd scipy ; python3 setup.py build -RUN cd scipy ; python3 setup.py install -RUN rm -rf scipy +WORKDIR /scipy +RUN git checkout v1.10.1 +RUN git submodule update --init +RUN python3 setup.py build +RUN python3 setup.py install # Install MongoDB C++ bindings RUN wget https://github.com/mongodb/mongo-c-driver/releases/download/1.17.3/mongo-c-driver-1.17.3.tar.gz diff --git a/dev b/dev index 28fc95a..518ab3c 160000 --- a/dev +++ b/dev @@ -1 +1 @@ -Subproject commit 28fc95a4c4d454eec3809fe734d6eebe703a9ca8 +Subproject commit 518ab3c7f8a0a724081fcd7ed518c669724bcd37 diff --git a/docs/source/conf.py b/docs/source/conf.py index 2b61401..6b2a178 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -58,7 +58,7 @@ # General information about the project. project = 'scine_puffin' -copyright = '2022, ETH Zurich, Laboratory for Physical Chemistry, Reiher Group' +copyright = 'ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group' author = 'Reiher Group' # The version info for the project you're documenting, acts as replacement for diff --git a/docs/source/programs.rst b/docs/source/programs.rst index c362d86..2368468 100644 --- a/docs/source/programs.rst +++ b/docs/source/programs.rst @@ -27,6 +27,10 @@ SCINE: Sparrow `````````````` .. autoclass:: scine_puffin.programs.sparrow.Sparrow +SCINE: Parrot +````````````` +.. autoclass:: scine_puffin.programs.parrot.Parrot + Serenity ```````` .. autoclass:: scine_puffin.programs.serenity.Serenity diff --git a/requirements-dev.txt b/requirements-dev.txt index af4cd1a..0a829a2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,10 +3,6 @@ matplotlib pytest coverage recommonmark -sphinx -sphinx_rtd_theme -sphinx_autodoc_typehints -sphinx_copybutton numpydoc pylint pylint-exit diff --git a/requirements.txt b/requirements.txt index a9979c3..7fbc0fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,13 @@ gitpython numpy psutil +pymatgen python-daemon pyyaml scipy setproctitle +sphinx +sphinx_rtd_theme +sphinx_autodoc_typehints +sphinx_copybutton +salib diff --git a/scine_puffin/__init__.py b/scine_puffin/__init__.py index bbe4371..738cf24 100644 --- a/scine_puffin/__init__.py +++ b/scine_puffin/__init__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/__main__.py b/scine_puffin/__main__.py index 4fcf961..2031168 100644 --- a/scine_puffin/__main__.py +++ b/scine_puffin/__main__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/_version.py b/scine_puffin/_version.py index 58c5a22..96cdefe 100644 --- a/scine_puffin/_version.py +++ b/scine_puffin/_version.py @@ -1,6 +1,6 @@ __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ -__version__ = "1.2.0" +__version__ = "1.3.0" diff --git a/scine_puffin/bootstrap.py b/scine_puffin/bootstrap.py index d2a2261..888e615 100644 --- a/scine_puffin/bootstrap.py +++ b/scine_puffin/bootstrap.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ import os @@ -62,15 +62,18 @@ def bootstrap(config: Configuration): # Install minimal requirement print("") - print("Building SCINE Utils from sources.") + print("Building SCINE Core/Utils from sources.") print("") + core_build_dir = os.path.join(build_dir, "core") + core = Utils(config.programs()["core"]) + core.install(core_build_dir, install_dir, config["resources"]["cores"]) utils_build_dir = os.path.join(build_dir, "utils") utils = Utils(config.programs()["utils"]) utils.install(utils_build_dir, install_dir, config["resources"]["cores"]) # Install all other programs for program_name, settings in config.programs().items(): - if program_name == "utils" or not settings["available"]: + if program_name in ['core', 'utils'] or not settings["available"]: continue print("") print("Preparing " + program_name.capitalize() + "...") @@ -101,6 +104,22 @@ def bootstrap(config: Configuration): "python" + str(python_version[0]) + "." + str(python_version[1]), "site-packages", ) + + ":" + + os.path.join( + install_dir, + "local", + "lib", + "python" + str(python_version[0]) + "." + str(python_version[1]), + "dist-packages", + ) + + ":" + + os.path.join( + install_dir, + "local", + "lib64", + "python" + str(python_version[0]) + "." + str(python_version[1]), + "dist-packages", + ) ) env["PATH"] = os.path.join(install_dir, "bin") env["LD_LIBRARY_PATH"] = os.path.join(install_dir, "lib") + ":" + os.path.join(install_dir, "lib64") diff --git a/scine_puffin/config.py b/scine_puffin/config.py index b0cd996..546a867 100644 --- a/scine_puffin/config.py +++ b/scine_puffin/config.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -53,7 +53,7 @@ class Configuration: configuration are: **daemon** - The settings peratining the execution of Puffin and its daemon process. + The settings pertaining the execution of Puffin and its daemon process. **database** All information about the database the Puffin will be working on. @@ -233,11 +233,29 @@ def __init__(self): "enforce_memory_limit": True, } self._data["programs"] = { + "ams": { + "available": False, + "source": "https://github.com/qcscine/ams_wrapper.git", + "root": "", + "version": "0.0.0", + "march": "native", + "cxx_compiler_flags": "", + "cmake_flags": "", + }, "readuct": { "available": True, "source": "https://github.com/qcscine/readuct.git", "root": "", - "version": "5.0.0", + "version": "5.1.0", + "march": "native", + "cxx_compiler_flags": "", + "cmake_flags": "", + }, + "core": { + "available": True, + "source": "https://github.com/qcscine/core.git", + "root": "", + "version": "6.0.0", "march": "native", "cxx_compiler_flags": "", "cmake_flags": "", @@ -246,7 +264,7 @@ def __init__(self): "available": True, "source": "https://github.com/qcscine/utilities.git", "root": "", - "version": "8.0.0", + "version": "9.0.0", "march": "native", "cxx_compiler_flags": "", "cmake_flags": "", @@ -255,7 +273,7 @@ def __init__(self): "available": True, "source": "https://github.com/qcscine/database.git", "root": "", - "version": "1.2.0", + "version": "1.3.0", "march": "native", "cxx_compiler_flags": "", "cmake_flags": "", @@ -264,7 +282,7 @@ def __init__(self): "available": True, "source": "https://github.com/qcscine/sparrow.git", "root": "", - "version": "4.0.0", + "version": "5.0.0", "march": "native", "cxx_compiler_flags": "", "cmake_flags": "", @@ -273,7 +291,7 @@ def __init__(self): "available": True, "source": "https://github.com/qcscine/molassembler.git", "root": "", - "version": "2.0.0", + "version": "2.0.1", "march": "native", "cxx_compiler_flags": "", "cmake_flags": "", @@ -282,7 +300,7 @@ def __init__(self): 'available': False, 'source': 'https://github.com/qcscine/swoose.git', 'root': '', - 'version': 'master', + 'version': '2.0.0', 'march': 'native', "cmake_flags": "", "cxx_compiler_flags": "", @@ -309,7 +327,7 @@ def __init__(self): "available": False, "source": "https://github.com/qcscine/serenity_wrapper.git", "root": "", - "version": "2.0.0", + "version": "3.0.0", "march": "native", "cxx_compiler_flags": "", "cmake_flags": "", @@ -324,13 +342,13 @@ def __init__(self): "available": False, "source": "https://github.com/qcscine/xtb_wrapper.git", "root": "", - "version": "2.0.0", + "version": "3.0.0", "march": "native", "cxx_compiler_flags": "", "cmake_flags": "", }, "kinetx": { - "available": False, + "available": True, "source": "https://github.com/qcscine/kinetx.git", "root": "", "version": "2.0.0", @@ -338,6 +356,24 @@ def __init__(self): "cxx_compiler_flags": "", "cmake_flags": "", }, + "rms": { + "available": False, + "source": "", + "root": "", + "version": "main" + }, + "mrcc": { + 'available': False, + 'source': '', + 'root': '', + 'version': 'march2022', + }, + "parrot": { + "available": False, + "source": "https://github.com/qcscine/parrot.git", + "root": "", + "version": "0.0.0" + }, } def __getitem__(self, key: str) -> dict: diff --git a/scine_puffin/daemon.py b/scine_puffin/daemon.py index a76922f..bf0fc06 100644 --- a/scine_puffin/daemon.py +++ b/scine_puffin/daemon.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ import os diff --git a/scine_puffin/jobloop.py b/scine_puffin/jobloop.py index 99aef01..beba38b 100644 --- a/scine_puffin/jobloop.py +++ b/scine_puffin/jobloop.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -257,10 +257,9 @@ def check_setup(config: Configuration) -> Dict[str, str]: if scine_database is None: print("Missing SCINE Utilities, please bootstrap Puffin.") sys.exit(1) - scine_database = util.find_spec("scine_readuct") - if scine_database is None: - print("Missing SCINE ReaDuct, please bootstrap Puffin.") - sys.exit(1) + scine_readuct = util.find_spec("scine_readuct") + if scine_readuct is None: + print("SCINE ReaDuct is not available for Puffin. Note that this will disable nearly all exploration jobs.") # Generate the list of available programs available_programs = [] @@ -268,6 +267,18 @@ def check_setup(config: Configuration) -> Dict[str, str]: if settings["available"]: available_programs.append(program_name) + if scine_readuct is None and "readuct" in available_programs: + raise RuntimeError("SCINE ReaDuct was not found by Puffin but is set as available in the run configuration.\n" + "Please make sure that SCINE ReaDuct is installed properly, bootstrap Puffin, or disable\n" + "SCINE ReaDuct in the run configuration.") + + # Initialize all available programs + for program_name in available_programs: + class_name = "".join([s.capitalize() for s in program_name.split("_")]) + module = import_module("scine_puffin.programs." + program_name) + class_ = getattr(module, class_name) + class_.initialize() + # Gather list of all jobs all_jobs = [] import scine_puffin.jobs @@ -368,6 +379,7 @@ def _loop_impl( os.remove(stop_file) except FileNotFoundError: pass + _log(config, "Detected stop file " + stop_file + " and stopped puffin.") break @@ -570,6 +582,14 @@ def _job_execution(config: Configuration, job_class: type, manager, calculation, # Prepare job directory and start timer start = datetime.now() job.prepare(config["daemon"]["job_dir"], calculation.id()) + # Initialize programs that need initialization + for program_name, settings in config.programs().items(): + if settings["available"]: + # Initialize all available programs + class_name = "".join([s.capitalize() for s in program_name.split("_")]) + module = import_module("scine_puffin.programs." + program_name) + class_ = getattr(module, class_name) + class_.initialize() # Run job success = job.run(manager, calculation, config) # we already write a runtime in case puffin fails during copying operations diff --git a/scine_puffin/jobs/__init__.py b/scine_puffin/jobs/__init__.py index 4d46073..d85c6e5 100644 --- a/scine_puffin/jobs/__init__.py +++ b/scine_puffin/jobs/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/conformers.py b/scine_puffin/jobs/conformers.py index 0b78a40..db9d967 100644 --- a/scine_puffin/jobs/conformers.py +++ b/scine_puffin/jobs/conformers.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/deprecated/final_conformer_deduplication.py.depr b/scine_puffin/jobs/deprecated/final_conformer_deduplication.py.depr index 796884c..24cbe32 100644 --- a/scine_puffin/jobs/deprecated/final_conformer_deduplication.py.depr +++ b/scine_puffin/jobs/deprecated/final_conformer_deduplication.py.depr @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/deprecated/rdkit_conformers.py.depr b/scine_puffin/jobs/deprecated/rdkit_conformers.py.depr index 2dc72db..a57b1f8 100644 --- a/scine_puffin/jobs/deprecated/rdkit_conformers.py.depr +++ b/scine_puffin/jobs/deprecated/rdkit_conformers.py.depr @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/gaussian_charge_model_5.py b/scine_puffin/jobs/gaussian_charge_model_5.py index 3cc342d..dc62eb3 100644 --- a/scine_puffin/jobs/gaussian_charge_model_5.py +++ b/scine_puffin/jobs/gaussian_charge_model_5.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/graph.py b/scine_puffin/jobs/graph.py index cdb55fa..16e1e53 100644 --- a/scine_puffin/jobs/graph.py +++ b/scine_puffin/jobs/graph.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -25,12 +25,12 @@ class Graph(ConnectivityJob): If ``True``, the structure's connectivity is derived from interatomic distances via the utils.BondDetector: The bond orders used for interpretation are set to the maximum between those given in the - ``bond_orders`` property and 1.0, whereever the utils.BondDetector + ``bond_orders`` property and 1.0, wherever the utils.BondDetector detects a bond. (default: True) sub_based_on_distance_connectivity :: bool If ``True``, the structure's connectivity is derived from interatomic distances via the utils.BondDetector: The bond orders used for - interpretation are removed, whereever the utils.BondDetector does not + interpretation are removed, wherever the utils.BondDetector does not detect a bond. (default: True) enforce_bond_order_model :: bool If ``True``, only processes ``bond_orders`` that were generated with @@ -53,7 +53,7 @@ class Graph(ConnectivityJob): Graph representations of the structure will be added to the structures ``graphs`` field. The added representations are: A representation of the graph ``masm_cbor_graph``, and the decision representations of the existing - stereopermutators using a nearest neighour fit ``masm_decision_list`` + stereopermutators using a nearest neighbor fit ``masm_decision_list`` Any previous graph representations of the structure will be overwritten. """ diff --git a/scine_puffin/jobs/kinetx_kinetic_modeling.py b/scine_puffin/jobs/kinetx_kinetic_modeling.py index 8cbb43b..bf93340 100644 --- a/scine_puffin/jobs/kinetx_kinetic_modeling.py +++ b/scine_puffin/jobs/kinetx_kinetic_modeling.py @@ -1,20 +1,20 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ from typing import List -import numpy as np import scine_database as db -from .templates.job import Job, breakable, calculation_context, job_configuration_wrapper +from .templates.job import breakable, calculation_context, job_configuration_wrapper +from .templates.kinetic_modeling_jobs import KineticModelingJob from ..utilities.compound_and_flask_helpers import get_compound_or_flask from scine_puffin.config import Configuration -class KinetxKineticModeling(Job): +class KinetxKineticModeling(KineticModelingJob): """ A job that performs the kinetic modeling using KiNetX given a set of reactions and an electronic structure model. The reaction rates are calculated from transition @@ -27,14 +27,20 @@ class KinetxKineticModeling(Job): ``kinetx_kinetic_modeling`` **Required Input** - All reactions, concentrations, rate constants, etc. are parsed through the - settings. The numerical integration is done through KiNetX. The final and - maximum concentration for each compound is written to its centroid. The - concentrations trajectories are written to the raw output by KiNetX. - model :: db.Model The electronic structure model to flag the new properties with. + **Required Settings** + aggregate_ids :: List[str] + The aggregate IDs (as strings). + reaction_ids :: List[str] + The reaction IDs (as strings). + aggregate_types :: List[int] + The aggregate types. 0 for compounds, 1 for flasks. + lhs_rates :: List[float] + The reaction rates for the forward reactions. + rhs_rates :: List[float] + The reaction rates for the backward reactions. **Optional Settings** Optional settings are read from the ``settings`` field, which is part of any ``Calculation`` stored in a SCINE Database. @@ -52,6 +58,13 @@ class KinetxKineticModeling(Job): n_batches :: int The numerical integration is done in batches of time steps. After each step the maximum concentration for each compound is updated. This is the number of time-step batches. + energy_model_program :: str + The program with which the electronic structure model should be flagged. Default any. + convergence :: float + Stop the numerical integration if the concentrations do not change more than this threshold between + intervals. + concentration_label_postfix :: str + Post fix to the property label. Default "". **Required Packages** - SCINE: Database (present by default) @@ -63,8 +76,10 @@ class KinetxKineticModeling(Job): generated and added to the database: Properties - The maximum and final concentration of each compound is added to - its centroid. + The maximum and final concentration, and the vertex flux of each aggregate is added to + its centroid. The edge flux, forward + backward flux for each reaction is added to the centroid of the first + aggregate on the reaction's LHS. Note, that the properties are NOT listed in the results to avoid large DB + documents. """ def __init__(self): @@ -99,7 +114,7 @@ def run(self, manager, calculation, config: Configuration) -> bool: n_reactions = len(reaction_ids) n_aggregates = len(aggregate_id_list) if len(reaction_ids) != len(lhs_rates_per_reaction) or len(reaction_ids) != len(rhs_rates_per_reaction): - raise RuntimeError("The number of reaction rates differs from the number of reactions.") + raise AssertionError("The number of reaction rates differs from the number of reactions.") network_builder = kinetx.NetworkBuilder() # Prepare the data arrays / network network_builder.reserve(n_compounds=n_aggregates, n_reactions=n_reactions, n_channels_per_reaction=1) @@ -122,78 +137,23 @@ def run(self, manager, calculation, config: Configuration) -> bool: concentration_data, reaction_flux, reaction_flux_forward, reaction_flux_backward = kinetx.integrate( network, concentrations, 0.0, time_step, solver, batch_interval, n_batches, convergence) # Save the concentrations + final_concentrations = concentration_data[:, 0] + max_concentrations = concentration_data[:, 1] + flux_concentrations = concentration_data[:, 2] results = calculation.get_results() - self._write_concentrations_to_centroids(aggregate_id_list, aggregate_type_list, concentration_data, - reaction_flux, reaction_flux_forward, reaction_flux_backward, - reaction_ids, manager, results) - calculation.set_results(results) + self.model.program = self.settings["energy_model_program"] + self._write_concentrations_to_centroids(aggregate_id_list, aggregate_type_list, reaction_ids, + [max_concentrations, final_concentrations, flux_concentrations], + [reaction_flux, reaction_flux_forward, reaction_flux_backward], + [self.c_max_label, self.c_final_label, self.c_flux_label], + [self.r_flux_label, self.r_forward_label, self.r_backward_label], + results, self.settings["concentration_label_postfix"], True) + # calculation.set_results(results) self._disable_all_aggregates() self.complete_job() return self.postprocess_calculation_context() - def _resolve_flask_to_compound_mapping(self, concentration_data, aggregate_id_list, - aggregate_type_list): - i = 0 - new_concentration_data = np.copy(concentration_data) - for a_id, a_type in zip(aggregate_id_list, aggregate_type_list): - if a_type == db.CompoundOrFlask.FLASK: - flask = db.Flask(a_id, self._flasks) - compounds_in_flask = flask.get_compounds() - for c_id in compounds_in_flask: - if c_id in aggregate_id_list: - j = aggregate_id_list.index(c_id) - new_concentration_data[j, :] += concentration_data[i, :] - i += 1 - return new_concentration_data - - def _write_concentrations_to_centroids(self, aggregate_id_list, aggregate_type_list, original_concentration_data, - total_reaction_flux, forward_reaction_flux, backward_reaction_flux, - reaction_ids, manager, results) -> None: - """ - Write the final and maximum concentrations to the centroids of each compound. - """ - self.model.program = self.settings["energy_model_program"] - concentration_data = self._resolve_flask_to_compound_mapping(original_concentration_data, aggregate_id_list, - aggregate_type_list) - i = 0 - post_fix = self.settings["concentration_label_postfix"] - print(post_fix) - for a_id, a_type in zip(aggregate_id_list, aggregate_type_list): - aggregate = get_compound_or_flask(a_id, a_type, self._compounds, self._flasks) - centroid = aggregate.get_centroid(manager) - flux_c = concentration_data[i, 2] - max_c = concentration_data[i, 1] - final_c = concentration_data[i, 0] - max_concentration_label = "max_concentration" + post_fix - final_concentration_label = "final_concentration" + post_fix - concentration_flux_label = "concentration_flux" + post_fix - self._write_concentration_property(centroid, max_concentration_label, max_c, results) - self._write_concentration_property(centroid, final_concentration_label, final_c, results) - self._write_concentration_property(centroid, concentration_flux_label, flux_c, results) - i += 1 - # Save edge flux (for the time being I will save it as a property to the centroid of the first LHS aggregate). - for i, r_id in enumerate(reaction_ids): - r_flux_total = total_reaction_flux[i, 0] - r_flux_forward = forward_reaction_flux[i, 0] - r_flux_backward = backward_reaction_flux[i, 0] - total_flux_label = r_id.string() + "_reaction_edge_flux" + post_fix - forward_flux_label = r_id.string() + "_forward_edge_flux" + post_fix - backward_flux_label = r_id.string() + "_backward_edge_flux" + post_fix - a_id = db.Reaction(r_id, self._reactions).get_reactants(db.Side.LHS)[0][0] - a_type = db.Reaction(r_id, self._reactions).get_reactant_types(db.Side.LHS)[0][0] - aggregate = get_compound_or_flask(a_id, a_type, self._compounds, self._flasks) - centroid = aggregate.get_centroid(manager) - self._write_concentration_property(centroid, total_flux_label, r_flux_total, results) - self._write_concentration_property(centroid, forward_flux_label, r_flux_forward, results) - self._write_concentration_property(centroid, backward_flux_label, r_flux_backward, results) - - def _write_concentration_property(self, centroid: db.Structure, label: str, value: float, results: db.Results): - prop = db.NumberProperty.make(label, self.model, value, self._properties) - results.add_property(prop.id()) - centroid.add_property(label, prop.id()) - prop.set_structure(centroid.id()) - def _add_all_aggregates(self, aggregate_id_list: List[db.ID], aggregate_type_list: List[db.CompoundOrFlask], network_builder) -> None: """ @@ -258,18 +218,7 @@ def check_mass_balance(self, lhs_stoichiometry, rhs_stoichiometry, aggregate_id_ centroid = aggregate.get_centroid() rhs_mass += n * self._calculate_weight(centroid) if abs(rhs_mass - lhs_mass) > 1e-6: - raise RuntimeError("Unbalanced masses in reaction. You are destroying/creating atoms!") - - def _disable_all_aggregates(self): - """ - Disable the exploration of all aggregates. - """ - for compound in self._compounds.iterate_all_compounds(): - compound.link(self._compounds) - compound.disable_exploration() - for flask in self._flasks.iterate_all_flasks(): - flask.link(self._flasks) - flask.disable_exploration() + raise AssertionError("Unbalanced masses in reaction. You are destroying/creating atoms!") @staticmethod def required_programs(): diff --git a/scine_puffin/jobs/orca_geometry_optimization.py b/scine_puffin/jobs/orca_geometry_optimization.py index 9742154..0d11970 100644 --- a/scine_puffin/jobs/orca_geometry_optimization.py +++ b/scine_puffin/jobs/orca_geometry_optimization.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -212,6 +212,8 @@ def run(self, manager, calculation, config: Configuration) -> bool: job = calculation.get_job() # New label + # TODO: These labels are not necessarily correct; during the optimization, a + # complex coul be created label = structure.get_label() if label == db.Label.MINIMUM_GUESS or label == db.Label.MINIMUM_OPTIMIZED: new_label = db.Label.MINIMUM_OPTIMIZED diff --git a/scine_puffin/jobs/rms_kinetic_modeling.py b/scine_puffin/jobs/rms_kinetic_modeling.py new file mode 100644 index 0000000..7c1c107 --- /dev/null +++ b/scine_puffin/jobs/rms_kinetic_modeling.py @@ -0,0 +1,302 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +import numpy as np +import scine_database as db +from typing import Optional, List +from multiprocessing import Pool + +from .templates.job import breakable, calculation_context, job_configuration_wrapper +from .templates.kinetic_modeling_jobs import KineticModelingJob +from scine_puffin.config import Configuration + +from ..utilities.rms_kinetic_model import RMSKineticModel +from ..utilities.kinetic_modeling_sensitivity_analysis import RMSKineticModelingSensitivityAnalysis + + +class RmsKineticModeling(KineticModelingJob): + """ + Micro-kinetic modeling with the puffin-interface to the reaction mechanism simulator (RMS). + Note: Running jobs with RMS as a backend requires an installation of RMS (including its Python bindings). This + is not supported through the Puffin bootstrapping. See programs/rms.py for more information. + **Order Name** + ``rms_kinetic_modeling`` + **Required Input** + model :: db.Model + The electronic structure model to flag the new properties with. + + **Required Settings** + aggregate_ids :: List[str] + The aggregate IDs (as strings). + reaction_ids :: List[str] + The reaction IDs (as strings). + aggregate_types :: List[int] + The aggregate types. 0 for compounds, 1 for flasks. + ea :: List[float] + The activation energies for each reaction as the free energy difference to the reaction LHS (in J/mol). + enthalpies :: List[float] + The enthalpy of each aggregate (in J/mol). + entropies :: List[float] + The entropy of each aggregate (in J/mol). + arrhenius_prefactors :: List[float] + The exponential prefactors. + arrhenius_temperature_exponents :: List[float] + The temperature exponents in the Arrhenius equation. + start_concentrations :: List[float + The start concentrations of each aggregate. + + **Optional Settings** + Optional settings are read from the ``settings`` field, which is part of + any ``Calculation`` stored in a SCINE Database. + + The following options are available: + solver :: str + ODE solver. Currently only "CVODE_BDF" is supported. + phase_type :: str + The reactor phase. Options are ideal_gas (assumes P=const, T=const), ideal_dilute_solution + (assumes V=const, T=const). Default is "ideal_gas". + max_time :: float + Maximum integration time in seconds. Default 3600.0. + energy_model_program :: str + The program with which the electronic structure model should be flagged. Default any. + viscosity :: float + The solvent viscosity (in Pa s). Needs phase=ideal_dilute_solution and diffusion_limited=true. If "none", the + viscosity is taken from tabulated values. + reactor_solvent :: str + The reactor solvent. If "none", the solvent in the electronic structure model is used if any. + site_density :: float + The density of surface sites. Default is "none". Requires phase=ideal_surface. Not fully supported yet. + diffusion_limited :: bool + If true, diffusion limits are enforced. Requires phase=ideal_dilute_solution. May lead to numerical + instability of the ODE solver. Default False. + reactor_temperature :: float + The reactor temperature (in K). If "none", the temperature in the model is used. Default "none". + reactor_pressure :: float + The reactor pressure (in Pa). If none, the pressure in the model is used. Default "none". + absolute_tolerance :: float + The absolute tolerance of the ODE solver. High values lead to a faster but less reliable integration. Default + 1e-20. + relative_tolerance :: float + The relative tolerance of the ODE solver. High values lead to a faster but less reliable integration. + Default 1e-6. + solvent_aggregate_str_id :: str + The aggregate ID of the solvent as a string. If "none", the solvent is assumed to be unreactive. + solvent_concentration :: float + The solvent concentraion. Defualt is 55.3 (mol/L). + + enforce_mass_balance :: bool + If true, the an error is raised for any non-balanced reaction. + + screen_sensitivities :: bool + If true, only parameters associated to aggregates and reactions with significant concentration flux are + considered in the sensitivity analysis (flux > oaat_vertex_flux_threshold | flux > oaat_vertex_flux_threshold). + + **Required Packages** + - SCINE: Database (present by default) + - rms + + **Generated Data** + If successful (technically and chemically) the following data will be + generated and added to the database: + + Properties + The maximum and final concentration, and the vertex flux of each aggregate is added to + its centroid. The edge flux for each reaction is added to the centroid of the first aggregate on the reaction's + LHS. Note, that the properties are NOT listed in the results to avoid large DB documents. + """ + def __init__(self): + super().__init__() + self.name: str = "RMS kinetic modeling job" + self.settings = { + "solver": "Recommended", + "phase_type": "ideal_gas", + "max_time": 3600.0, + "energy_model_program": "any", + "viscosity": "none", # in Pa s + "reactor_solvent": "none", + "site_density": "none", + "diffusion_limited": False, + "reactor_temperature": "none", # in K + "reactor_pressure": "none", # in Pa, + "absolute_tolerance": 1e-20, + "relative_tolerance": 1e-6, + "solvent_aggregate_str_id": "none", + "solvent_concentration": 55.3, + "sensitivity_analysis": "none", + "adjoined_sensitivity_threshold": 1e-2, + "absolute_tolerance_sensitivity": 1e-6, + "relative_tolerance_sensitivity": 1e-3, + "oaat_vertex_flux_threshold": 1e-2, + "oaat_edge_flux_threshold": 1e-2, + "sample_size": 10, + "local_sensitivities": False, + "adjoined_sensitivities": False, + "save_oaat_var": False, + "distribution_shape": "truncnorm", + "enforce_mass_balance": True, + "screen_global_sens_size": 1e+3 + } + self.model: db.Model = db.Model("PM6", "PM6", "") + self._rms_file_name: str = "chem.rms" + self._rms_aggregate_indices = [] + self._solvent_a_index: Optional[int] = None + self._viscosity: Optional[float] = None + self._solvent: Optional[str] = None + self._site_density: Optional[float] = None + self._pressure: Optional[float] = None + self._temperature: Optional[float] = None + self._phase_type: Optional[str] = None + self._phase_options = ["ideal_dilute_solution", "ideal_gas"] + self._sensitivity_options = ["none", "adjoined_sensitivities", "one_at_a_time_sensitivities", "morris", "sobol"] + self._solvent_aggregate_str_id: Optional[str] = None + self._solvent_species_added: bool = False + self.reaction_ids: List[db.ID] = [] + self.aggregate_id_list: List[db.ID] = [] + self.aggregate_types: List[db.CompoundOrFlask] = [] + self.max_time: float = float(self.settings["max_time"]) + self.abs_tol: float = float(self.settings["absolute_tolerance"]) + self.rel_tol: float = float(self.settings["relative_tolerance"]) + self._sample_size: int = 50 + self._n_cores: int = 1 + self.force_parallel = False + + self.rms_kinetic_model: Optional[RMSKineticModel] = None + + def use_n_cores(self, n_cores: int) -> int: + assert isinstance(self.rms_kinetic_model, RMSKineticModel) + if self.force_parallel: + return n_cores + if self.rms_kinetic_model.get_n_parameters() > 100: + return n_cores + if self.settings["sensitivity_analysis"] in ["morris", "sobol"]: + return n_cores + return 1 + + @staticmethod + def required_programs(): + return ["database", "rms"] + + @job_configuration_wrapper + def run(self, manager: db.Manager, calculation: db.Calculation, config: Configuration) -> bool: + with breakable(calculation_context(self)): + self._calculation = calculation + self.settings.update(calculation.get_settings()) + self._resolve_default_settings() + rms_path = config["programs"]["rms"]["root"] + self.rms_kinetic_model = RMSKineticModel(self.settings, manager, self.model, rms_path, self._rms_file_name) + # Importing Julia has a significant overhead. If we parallelize we have to import Julia potentially more + # than once (i.e., once in each sub process). Therefore, we should only do this if the kinetic model is + # large enough that the parallelization actually accelerates the calculation. + self._n_cores = self.use_n_cores(int(config["resources"]["cores"])) + if self._n_cores > 1: + # In the case we parallelize the kinetic modeling, we must ensure that Julia is only imported in the + # worker processes and NEVER in the main process. + with Pool(1) as pool: + res = pool.map(self.rms_kinetic_model.calculate_fluxes_and_concentrations, [self._rms_file_name]) + else: + res = [self.rms_kinetic_model.calculate_fluxes_and_concentrations(self._rms_file_name)] + c_max, c_final, c_flux, r_flux, adjoined_sens, _ = res[0] + + print("Maximum concentrations") + print(c_max) + print("Final concentrations") + print(c_final) + print("Absolute edge flux") + print(r_flux) + print("Absolute vertex flux") + print(c_flux) + + results = calculation.get_results() + if self.settings["sensitivity_analysis"] == "one_at_a_time_sensitivities"\ + or self.settings['local_sensitivities']: + vertex_t = float(self.settings["oaat_vertex_flux_threshold"]) + edge_t = float(self.settings["oaat_edge_flux_threshold"]) + flux_replace = 10.0 * vertex_t + sens = RMSKineticModelingSensitivityAnalysis(self.rms_kinetic_model, self._n_cores, self._sample_size) + max_sens, sens_c_final, flux_sens, var_max, var_final, var_flux = sens.one_at_a_time_differences( + c_flux, r_flux, vertex_t, edge_t, flux_replace, c_max, c_final) + + self._write_sensitivities_to_database(flux_sens, "oaat_flux") + self._write_sensitivities_to_database(max_sens, "oaat_max") + self._write_sensitivities_to_database(sens_c_final, "oaat_final") + if self.settings["save_oaat_var"]: + self._write_concentrations_to_centroids(self.aggregate_id_list, self.aggregate_types, + self.reaction_ids, [var_final, var_max, var_flux], [], + ["var_final_c", "var_max_c", "var_flux_c"], [], + results) + if adjoined_sens is not None: + print("Adjoined Sensitivities") + print(adjoined_sens) + self._write_sensitivities_to_database(adjoined_sens, "adjoined") + salib = RMSKineticModelingSensitivityAnalysis(self.rms_kinetic_model, self._n_cores, self._sample_size, + self.settings["distribution_shape"]) + if self.settings["sensitivity_analysis"] == "morris": + if self.settings["screen_global_sens_size"] < salib.get_n_parameters(): + vertex_t = float(self.settings["oaat_vertex_flux_threshold"]) + edge_t = float(self.settings["oaat_edge_flux_threshold"]) + salib.set_prescreening_condition(c_flux, r_flux, vertex_t, edge_t) + mu, mu_star, sigma, _ = salib.morris_sensitivities() + m_v = salib.analyse_runs() + self._write_sensitivities_to_database(mu['c_max'], "morris_mu_c_max") + self._write_sensitivities_to_database(mu['c_final'], "morris_mu_c_final") + self._write_sensitivities_to_database(mu_star['c_max'], "morris_mu_star_c_max") + self._write_sensitivities_to_database(mu_star['c_final'], "morris_mu_star_c_final") + self._write_sensitivities_to_database(sigma['c_max'], "morris_sigma_c_max") + self._write_sensitivities_to_database(sigma['c_final'], "morris_sigma_c_final") + self._write_concentrations_to_centroids(self.aggregate_id_list, self.aggregate_types, self.reaction_ids, + [m_v[0][0], m_v[1][0], m_v[2][0], m_v[0][1], m_v[1][1], + m_v[2][1]], [], ["morris_mean_c_max", "morris_mean_c_final", + "morris_mean_c_flux", "morris_var_c_max", + "morris_var_c_final", "morris_var_c_flux"], + [], results) + + elif self.settings["sensitivity_analysis"] == "sobol": + st, s1, _ = salib.sobol_sensitivities() + m_v = salib.analyse_runs() + self._write_sensitivities_to_database(st['c_max'], "sobol_st_c_max") + self._write_sensitivities_to_database(st['c_final'], "sobol_st_c_final") + self._write_sensitivities_to_database(s1['c_max'], "sobol_s1_c_max") + self._write_sensitivities_to_database(s1['c_final'], "sobol_s1_c_final") + self._write_concentrations_to_centroids(self.aggregate_id_list, self.aggregate_types, self.reaction_ids, + [m_v[0][0], m_v[1][0], m_v[2][0], m_v[0][1], m_v[1][1], + m_v[2][1]], [], ["sobol_mean_c_max", "sobol_mean_c_final", + "sobol_mean_c_flux", "sobol_var_c_max", + "sobol_var_c_final", "sobol_var_c_flux"], + [], results) + + self._write_concentrations_to_centroids(self.aggregate_id_list, self.aggregate_types, self.reaction_ids, + [c_max, c_final, c_flux], [r_flux], + [self.c_max_label, self.c_final_label, self.c_flux_label], + [self.r_flux_label], results) + self._disable_all_aggregates() + self.complete_job() + return self.postprocess_calculation_context() + + def _write_sensitivities_to_database(self, absolute_sensitivities: np.ndarray, prop_label: str): + counter = 0 + results = self._calculation.get_results() + for a_id, a_type in zip(self.aggregate_id_list, self.aggregate_types): + centroid = self._get_aggregate_centroid(a_id, a_type) + label = "max_free_energy_sensitivity_" + prop_label + self._write_concentration_property(centroid, label, absolute_sensitivities[counter], results) + counter += 1 + # The last species free energy sensitivity may be for a potentially added solvent. + if absolute_sensitivities.shape[0] > len(self.reaction_ids) + len(self.aggregate_id_list): + counter += 1 + for r_id in self.reaction_ids: + centroid = self._get_reaction_centroid(r_id) + label = r_id.string() + "_reaction_barrier_sensitivity_" + prop_label + self._write_concentration_property(centroid, label, absolute_sensitivities[counter], results) + counter += 1 + + def _resolve_default_settings(self): + self.model = self._calculation.get_model() + self.model.program = self.settings["energy_model_program"] + self._sample_size = self.settings["sample_size"] + + self.reaction_ids = [db.ID(r_id_str) for r_id_str in self.settings["reaction_ids"]] + self.aggregate_id_list = [db.ID(c_id_str) for c_id_str in self.settings["aggregate_ids"]] + self.aggregate_types = [db.CompoundOrFlask(a_type) for a_type in self.settings["aggregate_types"]] diff --git a/scine_puffin/jobs/scine_afir.py b/scine_puffin/jobs/scine_afir.py index bfc84d2..be091bb 100644 --- a/scine_puffin/jobs/scine_afir.py +++ b/scine_puffin/jobs/scine_afir.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -101,7 +101,6 @@ def run(self, manager, calculation, config: Configuration) -> bool: raise RuntimeError(self.name + " is only meant for a single structure!") settings_manager.separate_settings(self._calculation.get_settings()) self.sort_settings(settings_manager.task_settings) - new_label = self.determine_new_label(structure) self.systems, keys = settings_manager.prepare_readuct_task( structure, calculation, calculation.get_settings(), config["resources"] @@ -123,6 +122,10 @@ def run(self, manager, calculation, config: Configuration) -> bool: [structure.get_multiplicity()], settings_manager.calculator_settings ) + + graph, self.systems = self.make_graph_from_calc(self.systems, keys[0]) + new_label = self.determine_new_label(structure, graph) + new_structure = self.optimization_postprocessing(success, self.systems, product_names, structure, new_label, program_helper, ['energy', 'bond_orders']) self.store_property( diff --git a/scine_puffin/jobs/scine_bond_orders.py b/scine_puffin/jobs/scine_bond_orders.py index 2baa62f..1d5b42f 100644 --- a/scine_puffin/jobs/scine_bond_orders.py +++ b/scine_puffin/jobs/scine_bond_orders.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/scine_bspline_optimization.py b/scine_puffin/jobs/scine_bspline_optimization.py index 77c7b0f..fd68c14 100644 --- a/scine_puffin/jobs/scine_bspline_optimization.py +++ b/scine_puffin/jobs/scine_bspline_optimization.py @@ -1,13 +1,18 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ +import scine_database as db +import scine_utilities as utils + +from copy import deepcopy from scine_puffin.config import Configuration -from scine_puffin.utilities import scine_helper from .templates.job import breakable, calculation_context, job_configuration_wrapper from .templates.scine_react_job import ReactJob +from typing import Optional, List +from scine_puffin.utilities.scine_helper import SettingsManager class ScineBsplineOptimization(ReactJob): @@ -36,7 +41,7 @@ class ScineBsplineOptimization(ReactJob): Optional settings are read from the ``settings`` field, which is part of any ``Calculation`` stored in a SCINE Database. All possible settings for this job are based on those available in SCINE - Readuct. For a complete list see the + ReaDuct. For a complete list see the `ReaDuct manual `_ Given that this job does more than one, in fact many separate calculations @@ -76,7 +81,7 @@ class ScineBsplineOptimization(ReactJob): of 2 (default) will check triplet and quintet for a singlet and will check singlet, quintet und septet for triplet. - Additionally all settings that are recognized by the SCF program chosen. + Additionally, all settings that are recognized by the SCF program chosen. are also available. These settings are not required to be prepended with any flag. @@ -115,7 +120,7 @@ class ScineBsplineOptimization(ReactJob): def __init__(self): super().__init__() - self.name = "Scine double eneded transition state optimization from b-splines" + self.name = "Scine double ended transition state optimization from b-splines" self.exploration_key = "bspline" tsopt_defaults = { "output": ["ts"], @@ -151,13 +156,21 @@ def __init__(self): def run(self, manager, calculation, config: Configuration) -> bool: import scine_readuct as readuct - import scine_utilities as utils - import scine_database as db import scine_molassembler as masm # Everything that calls SCINE is enclosed in a try/except block with breakable(calculation_context(self)): + if len(calculation.get_structures()) != 2: + self.raise_named_exception(f"{self.name} requires 2 input structures.") r_structure = db.Structure(calculation.get_structures()[0], self._structures) p_structure = db.Structure(calculation.get_structures()[1], self._structures) + if len(r_structure.get_atoms()) != len(p_structure.get_atoms()): + self.raise_named_exception(f"{self.name} requires that the input structures are the same molecule.") + if r_structure.get_model() != p_structure.get_model(): + self.raise_named_exception(f"{self.name} requires that the input structures have the same model.") + if r_structure.get_multiplicity() != p_structure.get_multiplicity() or \ + r_structure.get_charge() != p_structure.get_charge(): + self.raise_named_exception(f"{self.name} requires that the input structures have the same " + f"molecular charge and spin multiplicity.") settings_manager, program_helper = self.create_helpers(r_structure) settings_manager.separate_settings(self._calculation.get_settings()) settings_manager.update_calculator_settings(r_structure, self._calculation.get_model(), @@ -168,12 +181,12 @@ def run(self, manager, calculation, config: Configuration) -> bool: # Prepare the structures by # 1. optimizing both spline ends. # 2. set attributes of parent class start_graph, start_charges etc. - reactant_name, product_name, opt_r_graph, opt_p_graph = self.prepare_structures(settings_manager, - r_structure, p_structure) + reactant_name, product_name, opt_r_graph, opt_p_graph = self.__prepare_structures(settings_manager, + r_structure, p_structure) # Stop the calculation if both spline ends collapsed to the same species. if masm.JsonSerialization.equal_molecules(opt_r_graph, opt_p_graph): - self.check_barrierless_reactions(settings_manager, reactant_name, product_name, r_structure, - p_structure) + self.__check_barrierless_reactions(settings_manager, reactant_name, product_name, r_structure, + p_structure) calculation.set_comment(self.name + " Spline ends transform barrier-less!") self.capture_raw_output() raise breakable.Break @@ -195,81 +208,27 @@ def run(self, manager, calculation, config: Configuration) -> bool: "B-Spline optimization failed:\n", ) - """ TSOPT JOB """ - print("TSOpt Settings:") - print(self.settings["tsopt"], "\n") + """ TSOPT-Hess-IRC """ inputs = self.output("bspline") - self.systems, success = self.observed_readuct_call( - 'run_tsopt_task', self.systems, inputs, **self.settings["tsopt"]) - self.throw_if_not_successful( - success, - self.systems, - self.output("tsopt"), - ["energy"], - "TS optimization failed:\n", - ) - """ TS HESSIAN """ - inputs = self.output("tsopt") - self.systems, success = readuct.run_hessian_task(self.systems, inputs) - self.throw_if_not_successful( - success, - self.systems, - inputs, - ["energy", "hessian", "thermochemistry"], - "TS Hessian calculation failed.\n", - ) - if self.n_imag_frequencies(inputs[0]) != 1: - self.raise_named_exception( - "Error: " - + self.name - + " failed with message: " - + "TS has incorrect number of imaginary frequencies." - ) - """ IRC JOB """ - # IRC (only a few steps to allow decent graph extraction) - print("IRC Settings:") - print(self.settings["irc"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_irc_task', self.systems, inputs, **self.settings["irc"]) - """ IRC OPT JOB """ - # Run a small energy minimization after initial IRC - inputs = self.output("irc") - print("IRC Optimization Settings:") - print(self.settings["ircopt"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[0]], **self.settings["ircopt"]) - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[1]], **self.settings["ircopt"]) - - """ Check whether we have a valid IRC """ - initial_charge = settings_manager.calculator_settings[utils.settings_names.molecular_charge] - product_names, start_names = self.irc_sanity_checks_and_analyze_sides( - initial_charge, self.check_charges, inputs, settings_manager.calculator_settings) - if product_names is None: # IRC did not pass checks, reason has been set as comment, complete job - self.verify_connection() - self.capture_raw_output() - scine_helper.update_model( - self.systems[self.output("tsopt")[0]], - self._calculation, - self.config, - ) - raise breakable.Break + product_names, start_names = self._tsopt_hess_irc_ircopt(inputs[0], settings_manager) """ Store new starting material conformer(s) """ r_tuple = None - p_tuple = self.check_barrierless_alternative_reactions(settings_manager, product_name, p_structure, - product_names, "product_00") + # This may be re-enabled in the future. Therefore, I would like to keep it as a comment. + # p_tuple = self.__check_barrierless_alternative_reactions(settings_manager, product_name, p_structure, + # product_names, "product_00") if start_names is not None: - r_tuple = self.check_barrierless_alternative_reactions(settings_manager, reactant_name, r_structure, - start_names, "reactant_00") + if not self.no_irc_structure_matches_start: + r_tuple = self.__check_barrierless_alternative_reactions(settings_manager, reactant_name, + r_structure, start_names, "reactant_00") start_structures = self.store_start_structures( - start_names, program_helper, "tsopt", []) + start_names, program_helper, "tsopt", [r_structure.id()]) else: if r_structure.get_model() == self._calculation.get_model(): start_structures = [self._calculation.get_structures()[0]] else: start_structures = self.store_start_structures( - [reactant_name], program_helper, "tsopt", []) + [reactant_name], program_helper, "tsopt", [r_structure.id()]) # If the lhs or rhs of the reaction decomposes into fragment through a barrier-less reaction and these # fragments are different from the fragments of the original lhs or rhs, e.g, @@ -278,18 +237,20 @@ def run(self, manager, calculation, config: Configuration) -> bool: # we have to add a barrier-less reaction transforming between the fragments. A simple example for such # a situation is a barrier-less protonation that is only barrier-less with the "new" electronic structure # model. - lhs, rhs = self.react_postprocessing(product_names, program_helper, "tsopt", start_structures) + # Note that this logic only applies if the individual endpoints used as input for the spline, are + # rediscovered by the IRC. Since this is only checked for the lhs, the corresponding fragmentation + # embedding for the rhs is disabled at the moment. + lhs, _, _ = self.react_postprocessing(product_names, program_helper, "tsopt", start_structures) if r_tuple is not None: - self.add_barrierless_reaction(r_tuple[0], r_tuple[1], r_tuple[2], lhs, r_tuple[3]) - if p_tuple is not None: - self.add_barrierless_reaction(p_tuple[0], p_tuple[1], p_tuple[2], rhs, p_tuple[3]) - + self.__add_barrierless_reaction(r_tuple[0], r_tuple[1], r_tuple[2], lhs, r_tuple[3]) + # if p_tuple is not None: + # self.__add_barrierless_reaction(p_tuple[0], p_tuple[1], p_tuple[2], rhs, p_tuple[3]) return self.postprocess_calculation_context() - def set_up_calculator(self, structure, settings_manager, name): - import scine_utilities as utils - from copy import deepcopy - + def __set_up_calculator(self, structure: db.Structure, settings_manager: SettingsManager, name: str): + """ + Create a calculator for the given structure. + """ xyz_name = name + ".xyz" utils.io.write(xyz_name, structure.get_atoms()) structure_calculator_settings = deepcopy(settings_manager.calculator_settings) @@ -306,31 +267,39 @@ def set_up_calculator(self, structure, settings_manager, name): fragment_atoms, graph_string, charges, multiplicities, decision_lists = self.get_graph_charges_multiplicities( name, structure.get_charge()) - if structure.has_graph("masm_cbor_graph") or structure.has_graph("masm_decision_list"): + if structure.has_graph("masm_cbor_graph") and structure.has_graph("masm_decision_list"): structure.set_graph("masm_cbor_graph", graph_string) structure.set_graph("masm_decision_list", ";".join(decision_lists)) return fragment_atoms, graph_string, charges, multiplicities, decision_lists - def check_barrierless_alternative_reactions(self, settings_manager, opt_name_reactant, - r_structure, opt_reactant_fragment_names, - r_name): - r_fragments, _, r_charges, r_multi, _ = self.set_up_calculator(r_structure, settings_manager, r_name) + def __check_barrierless_alternative_reactions(self, settings_manager: SettingsManager, opt_name_reactant: str, + r_structure: db.Structure, opt_reactant_fragment_names: List[str], + r_name: str): + """ + Check if there are multiple plausible decomposition paths for the given complex or structure, i.e, the structure + reacted barrier-less leading to a different fragmentation path as for the original elementary step. + """ + r_fragments, _, r_charges, r_multi, _ = self.__set_up_calculator(r_structure, settings_manager, r_name) if len(r_fragments) == 1: return None - opt_r_orig_names, opt_r_orig_fragment_graphs, _ = self.optimize_and_get_graphs_and_energies( - "opt_r_orig_fragments", r_fragments, r_charges, r_multi, settings_manager + opt_r_orig_names, opt_r_orig_fragment_graphs, _ = self.__optimize_and_get_graphs_and_energies( + "opt_" + r_name + "_orig_fragments", r_fragments, r_charges, r_multi, settings_manager ) - if not self.same_molecules(opt_r_orig_names, opt_reactant_fragment_names): - opt_r_graph = self.make_graph_from_calc(self.systems, opt_name_reactant) + if not self.__same_molecules(opt_r_orig_names, opt_reactant_fragment_names): + opt_r_graph, self.systems = self.make_graph_from_calc(self.systems, opt_name_reactant) return opt_name_reactant, opt_r_orig_fragment_graphs, opt_r_graph, opt_r_orig_names return None - def check_barrierless_reactions(self, settings_manager, opt_name_reactant, opt_name_product, r_structure, - p_structure): + def __check_barrierless_reactions(self, settings_manager: SettingsManager, opt_name_reactant: str, + opt_name_product: str, r_structure: db.Structure, p_structure: db.Structure): + """ + Check if both input structures collapsed to the same molecule. We will compare the optimized structures of + the input's lhs and rhs. + """ import scine_molassembler as masm results = self._calculation.get_results() results.clear() @@ -339,9 +308,9 @@ def check_barrierless_reactions(self, settings_manager, opt_name_reactant, opt_n r_name = "reactant_00" p_name = "product_00" - r_fragments, r_graph, r_charges, r_multi, _ = self.set_up_calculator( + r_fragments, r_graph, r_charges, r_multi, _ = self.__set_up_calculator( r_structure, settings_manager, r_name) - p_fragments, p_graph, p_charges, p_multi, _ = self.set_up_calculator( + p_fragments, p_graph, p_charges, p_multi, _ = self.__set_up_calculator( p_structure, settings_manager, p_name) # check graph of spline ends opt_r_fragments, opt_r_graph, opt_r_charges, opt_r_multiplicities, _ =\ @@ -350,41 +319,42 @@ def check_barrierless_reactions(self, settings_manager, opt_name_reactant, opt_n self.get_graph_charges_multiplicities(opt_name_product, charge) # create structures for optimized ends if ";" in opt_r_graph: - opt_r_fragment_names, opt_r_frgagment_graphs, _ = self.optimize_and_get_graphs_and_energies( + opt_r_fragment_names, opt_r_fragment_graphs, _ = self.__optimize_and_get_graphs_and_energies( "opt_r_fragments", opt_r_fragments, opt_r_charges, opt_r_multiplicities, settings_manager) - opt_reactant_structure_ids = self.add_barrierless_reaction(opt_name_reactant, opt_r_frgagment_graphs, - opt_r_graph, None, opt_r_fragment_names) - + opt_reactant_structure_ids = self.__add_barrierless_reaction(opt_name_reactant, opt_r_fragment_graphs, + opt_r_graph, None, opt_r_fragment_names) # optimization changed the initial complex. Add barrier-less step between previous fragments and the # optimized fragment if ";" in r_graph and not masm.JsonSerialization.equal_molecules(r_graph, opt_r_graph): - opt_r_orig_framnet_names, opt_r_orig_fragment_graphs, _ = self.optimize_and_get_graphs_and_energies( + opt_r_orig_framnet_names, opt_r_orig_fragment_graphs, _ = self.__optimize_and_get_graphs_and_energies( "opt_r_orig_fragments", r_fragments, r_charges, r_multi, settings_manager ) - if not self.same_molecules(opt_r_orig_framnet_names, opt_r_fragment_names): - self.add_barrierless_reaction(opt_name_reactant, opt_r_orig_fragment_graphs, opt_r_graph, - opt_reactant_structure_ids, opt_r_orig_framnet_names) + if not self.__same_molecules(opt_r_orig_framnet_names, opt_r_fragment_names): + self.__add_barrierless_reaction(opt_name_reactant, opt_r_orig_fragment_graphs, opt_r_graph, + opt_reactant_structure_ids, opt_r_orig_framnet_names) if ";" in opt_p_graph and not masm.JsonSerialization.equal_molecules(opt_p_graph, opt_r_graph): - opt_p_fragment_names, opt_p_frgagment_graphs, _ = self.optimize_and_get_graphs_and_energies( + opt_p_fragment_names, opt_p_frgagment_graphs, _ = self.__optimize_and_get_graphs_and_energies( "opt_p_fragments", opt_p_fragments, opt_p_charges, opt_p_multiplicities, settings_manager) - opt_structure_ids = self.add_barrierless_reaction(opt_name_product, opt_p_frgagment_graphs, opt_p_graph, - None, opt_p_fragment_names) + opt_structure_ids = self.__add_barrierless_reaction(opt_name_product, opt_p_frgagment_graphs, opt_p_graph, + None, opt_p_fragment_names) if ";" in p_graph and not masm.JsonSerialization.equal_molecules(p_graph, opt_p_graph): - opt_p_orig_framnet_names, opt_p_orig_fragment_graphs, _ = self.optimize_and_get_graphs_and_energies( + opt_p_orig_framnet_names, opt_p_orig_fragment_graphs, _ = self.__optimize_and_get_graphs_and_energies( "opt_p_orig_fragments", p_fragments, p_charges, p_multi, settings_manager ) - if not self.same_molecules(opt_p_orig_framnet_names, opt_p_fragment_names): - self.add_barrierless_reaction(opt_name_product, opt_p_orig_fragment_graphs, opt_p_graph, - opt_structure_ids, opt_p_orig_framnet_names) - - def prepare_structures(self, settings_manager, reactant_structure, products_structure): - from copy import deepcopy + if not self.__same_molecules(opt_p_orig_framnet_names, opt_p_fragment_names): + self.__add_barrierless_reaction(opt_name_product, opt_p_orig_fragment_graphs, opt_p_graph, + opt_structure_ids, opt_p_orig_framnet_names) + + def __prepare_structures(self, settings_manager, reactant_structure, products_structure): + """ + Optimize the input structures + generate graphs. + """ # optimize spline ends opt_name_reactant = self.optimize_structures("opt_reactant", [reactant_structure.get_atoms()], [reactant_structure.get_charge()], @@ -407,21 +377,16 @@ def prepare_structures(self, settings_manager, reactant_structure, products_stru return opt_name_reactant[0], opt_name_product[0], opt_r_graph, opt_p_graph - def create_complex_or_minimum(self, graph, calculator_name): - import scine_database as db + def __create_complex_or_minimum(self, graph: str, calculator_name: str): + """ + Create a structure with the correct label according to its Molassembler serialization (graph). + """ label = db.Label.MINIMUM_OPTIMIZED if ";" not in graph else db.Label.COMPLEX_OPTIMIZED new_structure = self.create_new_structure(self.systems[calculator_name], label) self.transfer_properties(self.ref_structure, new_structure) + bond_orders, self.systems = self.make_bond_orders_from_calc(self.systems, calculator_name) self.store_energy(self.systems[calculator_name], new_structure) - self.store_property( - self._properties, - "bond_orders", - "SparseMatrixProperty", - self.systems[calculator_name].get_results().bond_orders.matrix, - self._calculation.get_model(), - self._calculation, - new_structure, - ) + self.store_bond_orders(bond_orders, new_structure) self.store_property( self._properties, "atomic_charges", @@ -431,16 +396,19 @@ def create_complex_or_minimum(self, graph, calculator_name): self._calculation, new_structure, ) - self.add_graph(new_structure, self.systems[calculator_name].get_results().bond_orders) + self.add_graph(new_structure, bond_orders) results = self._calculation.get_results() results.add_structure(new_structure.id()) self._calculation.set_results(self._calculation.get_results() + results) return new_structure - def optimize_and_get_graphs_and_energies(self, fragment_base_name, fragments, charges, multiplicities, - settings_manager): - from copy import deepcopy + def __optimize_and_get_graphs_and_energies(self, fragment_base_name: str, fragments: List[utils.AtomCollection], + charges: List[int], multiplicities: List[int], + settings_manager: SettingsManager): + """ + Optimize molecular fragments and return their names, graphs, and energies. + """ opt_fragment_names = self.optimize_structures(fragment_base_name, fragments, charges, multiplicities, deepcopy(settings_manager.calculator_settings.as_dict())) @@ -458,17 +426,20 @@ def optimize_and_get_graphs_and_energies(self, fragment_base_name, fragments, ch return opt_fragment_names, opt_f_graphs, fragment_energies - def add_barrierless_reaction(self, opt_name, opt_f_graphs, opt_graph, opt_structure_ids, - opt_fragment_names): - import scine_database as db + def __add_barrierless_reaction(self, opt_name: str, opt_f_graphs: List[str], opt_graph: str, + opt_structure_ids: Optional[List[db.ID]], opt_fragment_names: List[str]): + """ + Add a barrier-less reaction to the database between the optimized structure and its fragments. + """ + self.__assert_conserved_atom(opt_fragment_names, [opt_name]) db_results = self._calculation.get_results() db_results.clear() fragment_structures = [] for name, graph in zip(opt_fragment_names, opt_f_graphs): - fragment_structures.append(self.create_complex_or_minimum(graph, name)) + fragment_structures.append(self.__create_complex_or_minimum(graph, name)) if opt_structure_ids is None: - opt_structure_ids = [self.create_complex_or_minimum(opt_graph, opt_name).id()] + opt_structure_ids = [self.__create_complex_or_minimum(opt_graph, opt_name).id()] new_step = db.ElementaryStep() new_step.link(self._elementary_steps) new_step.create([s.id() for s in fragment_structures], opt_structure_ids) @@ -478,10 +449,37 @@ def add_barrierless_reaction(self, opt_name, opt_f_graphs, opt_graph, opt_struct self._calculation.set_results(self._calculation.get_results() + db_results) return opt_structure_ids - def same_molecules(self, names_one, names_two): + def __assert_conserved_atom(self, lhs_names: List[str], rhs_names: List[str]): + """ + Assert that the number of atoms did not change between the calculators. + """ + lhs_atoms = [self.systems[name].structure for name in lhs_names] + rhs_atoms = [self.systems[name].structure for name in rhs_names] + lhs_counts = self.__get_elements_in_atom_collections(lhs_atoms) + rhs_counts = self.__get_elements_in_atom_collections(rhs_atoms) + print("Atom counts lhs", lhs_counts) + print("Atom counts rhs", rhs_counts) + if lhs_counts != rhs_counts: + raise RuntimeError("Error: Non stoichiometric elementary step detected. The structures are likely wrong.") + + @staticmethod + def __get_elements_in_atom_collections(atom_collections): + """ + Builds a dictionary containing the element symbols and the number of their occurrence in a given atom + collection. + """ + elements: List[str] = [] + for atom_collection in atom_collections: + elements += [str(e) for e in atom_collection.elements] + return {e: elements.count(e) for e in elements} + + def __same_molecules(self, names_one, names_two): + """ + Check if two molecules/calculators are the same according to charges, Molassembler (graphs), and multiplicities. + """ import scine_molassembler as masm - graphs_one, charges_one, multies_one = self.get_sorted_graphs_charges_multiplicities(names_one) - graphs_two, charges_two, multies_two = self.get_sorted_graphs_charges_multiplicities(names_two) + graphs_one, charges_one, multies_one = self.__get_sorted_graphs_charges_multiplicities(names_one) + graphs_two, charges_two, multies_two = self.__get_sorted_graphs_charges_multiplicities(names_two) total_graph_one = ";".join(graphs_one) total_graph_two = ";".join(graphs_two) @@ -489,15 +487,18 @@ def same_molecules(self, names_one, names_two): return charges_one == charges_two and multies_one == multies_two and masm.JsonSerialization.equal_molecules( total_graph_one, total_graph_two) - def get_sorted_graphs_charges_multiplicities(self, names_one): - import scine_utilities as utils + def __get_sorted_graphs_charges_multiplicities(self, names_one: List[str]): + """ + Get the sorted Molassembler serializations (graphs), charges, and multiplicites of the calculators corresponding + to the given names + """ charges_one = [] multies_one = [] graphs_one = [] for name in names_one: charges_one.append(self.systems[name].settings[utils.settings_names.molecular_charge]) multies_one.append(self.systems[name].settings[utils.settings_names.spin_multiplicity]) - graphs_one.append(self.make_graph_from_calc(self.systems, name)) + graphs_one.append(self.make_graph_from_calc(self.systems, name)[0]) graphs, charges, multiplicities = ( list(start_val) for start_val in zip(*sorted(zip( diff --git a/scine_puffin/jobs/scine_conceptual_dft.py b/scine_puffin/jobs/scine_conceptual_dft.py index b92a95b..76f723a 100644 --- a/scine_puffin/jobs/scine_conceptual_dft.py +++ b/scine_puffin/jobs/scine_conceptual_dft.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ from copy import deepcopy diff --git a/scine_puffin/jobs/scine_dissociation_cut.py b/scine_puffin/jobs/scine_dissociation_cut.py index 626c45b..a19cb0d 100644 --- a/scine_puffin/jobs/scine_dissociation_cut.py +++ b/scine_puffin/jobs/scine_dissociation_cut.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -145,7 +145,7 @@ def run(self, _, calculation, config: Configuration) -> bool: f"with '{len(rc_atoms)}' nuclei.") """ gather reactant info """ - bond_orders = self.make_bond_orders_from_calc(self.systems, self.rc_key) + bond_orders, self.systems = self.make_bond_orders_from_calc(self.systems, self.rc_key) if not self.expected_results_check(self.systems, self.rc_key, ['energy', 'atomic_charges'])[0]: self.systems, success = readuct.run_sp_task(self.systems, [self.rc_key]) self.throw_if_not_successful(success, self.systems, [self.rc_key], ['energy', 'atomic_charges']) @@ -253,7 +253,7 @@ def run(self, _, calculation, config: Configuration) -> bool: charge = lowest_combination[mol] name = split_names[charge][mol] product_names.append(name) - graphs.append(self.make_graph_from_calc(self.systems, name)) + graphs.append(self.make_graph_from_calc(self.systems, name)[0]) joined_graph = ";".join(graphs) print("Barrierless dissociation product graph:") print(joined_graph) @@ -340,7 +340,7 @@ def _print_dissociation_energies(self, c_entry = str(list(np.array(charge_diffs) + np.array(fragment_base_charges))) if energy is None: m_entry: Union[str, List[int]] = "Not converged" - e_entry = "Not converged" + e_entry: Union[str, float] = "Not converged" else: m_entry = [self.systems[split_names[charge][i]].settings[utils.settings_names.spin_multiplicity] for i, charge in enumerate(charge_diffs)] @@ -361,7 +361,7 @@ def _save_dissociated_structures(self, split_names: Dict[int, List[str]], lowest # store energy and bond orders for reactive complex, i.e. structure being dissociated start_structure = db.Structure(self._calculation.get_structures()[0], self._structures) self.store_energy(self.systems[self.rc_key], start_structure) - bond_orders = self.make_bond_orders_from_calc(self.systems, self.rc_key) + bond_orders, self.systems = self.make_bond_orders_from_calc(self.systems, self.rc_key) self.store_property( self._properties, "bond_orders", @@ -380,15 +380,13 @@ def _save_dissociated_structures(self, split_names: Dict[int, List[str]], lowest for mol, (name, energy) in enumerate(zip(names, energies)): if energy is None: continue - graph = self.make_graph_from_calc(self.systems, name) - if ";" in graph: - rhs_structure = self.create_new_structure(self.systems[name], db.Label.COMPLEX_OPTIMIZED) - else: - rhs_structure = self.create_new_structure(self.systems[name], db.Label.MINIMUM_OPTIMIZED) + graph, self.systems = self.make_graph_from_calc(self.systems, name) + label = self._determine_new_label_based_on_graph(self.systems[name], graph) + rhs_structure = self.create_new_structure(self.systems[name], label) db_results.add_structure(rhs_structure.id()) self.transfer_properties(self.ref_structure, rhs_structure) self.store_energy(self.systems[name], rhs_structure) - bond_orders = self.make_bond_orders_from_calc(self.systems, name) + bond_orders, self.systems = self.make_bond_orders_from_calc(self.systems, name) self.store_property( self._properties, "bond_orders", diff --git a/scine_puffin/jobs/scine_geometry_optimization.py b/scine_puffin/jobs/scine_geometry_optimization.py index 77ab9b7..ea56da8 100644 --- a/scine_puffin/jobs/scine_geometry_optimization.py +++ b/scine_puffin/jobs/scine_geometry_optimization.py @@ -1,15 +1,16 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ from scine_puffin.config import Configuration from .templates.job import calculation_context, job_configuration_wrapper from .templates.scine_optimization_job import OptimizationJob +from .templates.scine_connectivity_job import ConnectivityJob -class ScineGeometryOptimization(OptimizationJob): +class ScineGeometryOptimization(OptimizationJob, ConnectivityJob): """ A job optimizing the geometry of a given structure, in search of a local minimum on the potential energy surface. @@ -81,17 +82,17 @@ def __init__(self): @job_configuration_wrapper def run(self, manager, calculation, config: Configuration) -> bool: + self.run_geometry_optimization(calculation, config) + return self.postprocess_calculation_context() + def run_geometry_optimization(self, calculation, config): import scine_database as db import scine_readuct as readuct + from scine_utilities import settings_names as sn # preprocessing of structure structure = db.Structure(calculation.get_structures()[0], self._structures) settings_manager, program_helper = self.create_helpers(structure) - try: - new_label = self.determine_new_label(structure) - except BaseException: - return False # actual calculation with calculation_context(self): @@ -100,10 +101,35 @@ def run(self, manager, calculation, config: Configuration) -> bool: ) if program_helper is not None: program_helper.calculation_preprocessing(systems[keys[0]], calculation.get_settings()) + optimize_cell: bool = "unitcelloptimizer" in settings_manager.task_settings \ + and settings_manager.task_settings["unitcelloptimizer"] systems, success = readuct.run_opt_task(systems, keys, **settings_manager.task_settings) - self.optimization_postprocessing( + if optimize_cell: + # require to change the calculator settings, to avoid model completion failure + model = calculation.get_model() + old_pbc = model.periodic_boundaries + new_pbc = systems[keys[0]].settings[sn.periodic_boundaries] + systems[keys[0]].settings[sn.periodic_boundaries] = old_pbc + + # Graph generation + if success: + graph, systems = self.make_graph_from_calc(systems, keys[0]) + new_label = self.determine_new_label(structure, graph) + else: + new_label = db.Label.IRRELEVANT + + if graph: + structure.set_graph("masm_cbor_graph", graph) + + t = self.optimization_postprocessing( success, systems, keys, structure, new_label, program_helper ) - return self.postprocess_calculation_context() + if optimize_cell: + # update model of new structure to match the optimized unit cell + new_structure = db.Structure(calculation.get_results().structure_ids[0], self._structures) + model = new_structure.get_model() + model.periodic_boundaries = new_pbc + new_structure.set_model(model) + return t diff --git a/scine_puffin/jobs/scine_geometry_validation.py b/scine_puffin/jobs/scine_geometry_validation.py new file mode 100644 index 0000000..519c3a9 --- /dev/null +++ b/scine_puffin/jobs/scine_geometry_validation.py @@ -0,0 +1,278 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +import numpy as np +import sys + +from scine_puffin.config import Configuration +from .templates.job import breakable, calculation_context, job_configuration_wrapper +from .templates.scine_react_job import HessianJob, OptimizationJob, ConnectivityJob + +# TODO: Guess this should inherit from a template + + +class ScineGeometryValidation(HessianJob, OptimizationJob, ConnectivityJob): + + def __init__(self): + super().__init__() + self.name = "Scine Geometry Validation Job" + self.validation_key = "val" + self.opt_key = "opt" + self.job_key = self.validation_key + + val_defaults = { + "imaginary_wavenumber_threshold": 0.0, + "fix_distortion_step_size": -1.0, + "distortion_inversion_point": 2.0, + "optimization_attempts": 0, + } + opt_defaults = { + "stop_on_error": False, + "convergence_max_iterations": 50, + "geoopt_coordinate_system": "cartesianWithoutRotTrans" + } + + self.settings = { + "val": val_defaults, + "opt": opt_defaults, + } + self.start_graph = "" + self.start_key = "" + self.end_graph = "" + self.end_key = "" + self.systems = {} + self.inputs = [] + self.optimization_attempts_count = 0 + + @job_configuration_wrapper + def run(self, _, calculation, config: Configuration) -> bool: + + import scine_database as db + import scine_readuct as readuct + import scine_molassembler as masm + + # Everything that calls SCINE is enclosed in a try/except block + with breakable(calculation_context(self)): + # preprocessing of structure + structure = db.Structure(calculation.get_structures()[0], self._structures) + settings_manager, program_helper = self.create_helpers(structure) + + self.systems, _ = settings_manager.prepare_readuct_task( + structure, calculation, calculation.get_settings(), config["resources"] + ) + + # default keys are ['system'] + self.inputs = [key for key in self.systems.keys()] + self.start_key = self.inputs[0] + # Safety check + if not structure.has_graph("masm_cbor_graph"): + self.raise_named_exception("Given structure has no graph.") + self.start_graph = structure.get_graph("masm_cbor_graph") + + if program_helper is not None: + program_helper.calculation_preprocessing(self.systems[self.start_key], calculation.get_settings()) + + # # # # Extract Job settings + self.sort_settings(settings_manager.task_settings) + print("Validation Settings:") + print(self.settings[self.validation_key], "\n") + # Boolean's for logic in while loop + opt_success = True + clear_to_write = False + self.end_key = self.start_key + + # Enter Run Loop for number of allowed attempts + while self.optimization_attempts_count <= self.settings[self.validation_key]['optimization_attempts']: + """ HESSIAN JOB""" + self.systems, success = readuct.run_hessian_task( + self.systems, self.inputs) + self.throw_if_not_successful( + success, + self.systems, + self.inputs, + ["energy", "hessian", "thermochemistry"], + "Hessian calculation failed.\n", + ) + # Process hessian calculation + _ = self.calculation_postprocessing(success, self.systems, self.inputs, [ + "energy", "hessian", "thermochemistry"]) + + # Frequency check + hessian_results = self.systems[self.end_key].get_results() + false_minimum, mode_container = self.has_wavenumber_below_threshold( + hessian_results, + self.systems[self.end_key].structure, + self.settings[self.validation_key]["imaginary_wavenumber_threshold"] + ) + + if not false_minimum and opt_success: + """ SP JOB """ + # Copy calculator and delete its previous results + end_sp_key = self.end_key + "_sp" + self.systems[end_sp_key] = self.systems[self.end_key].clone() + self.systems[end_sp_key].delete_results() + + # Check graph + self.end_graph, self.systems = self.make_graph_from_calc(self.systems, end_sp_key) + + print("Start Graph:") + print(self.start_graph) + print("End Graph:") + print(self.end_graph) + # Compare start and end graph + if not masm.JsonSerialization.equal_molecules(self.start_graph, self.end_graph): + self._calculation.set_comment(self.name + ": End structure does not match starting structure.") + clear_to_write = False + else: + clear_to_write = True + # # # Leave while loop + break + + # Still counts left to optimize and false minium + elif (false_minimum or not opt_success) and self.optimization_attempts_count < \ + self.settings[self.validation_key]["optimization_attempts"]: + """ DISTORT AND OPT JOB """ + self.optimization_attempts_count += 1 + print("Optimization Attempt: " + str(self.optimization_attempts_count)) + # # # Distort only, if it is still a false minimum + if false_minimum: + # # # Distort, write into calculator + self._distort_structure_and_load_calculator(mode_container, settings_manager) + + print("Optimization Settings:") + print(self.settings[self.opt_key], "\n") + # Prepare optimization + end_opt_key = "distorted_opt_" + str(self.optimization_attempts_count) + self.settings[self.opt_key]["output"] = [end_opt_key] + # # # Optimization, per default stop on error is false + self.systems, opt_success = readuct.run_opt_task( + self.systems, self.inputs, **self.settings[self.opt_key]) + # Update inputs and end key for next round + self.inputs = self.settings[self.opt_key]["output"] + self.end_key = self.inputs[0] + + # One could adjust the convergence criteria of the optimization here + + else: + sys.stderr.write("Warning: Unable to do anything with this structure.") + break + + # Verify before writing + self.verify_connection() + + if clear_to_write: + final_sp_results = self.systems[end_sp_key].get_results() + # # # Store Energy and Bond Orders overwrites existing results of identical model + self.store_energy(self.systems[end_sp_key], structure) + self.store_property(self._properties, + "bond_orders", "SparseMatrixProperty", + final_sp_results.bond_orders.matrix, + self._calculation.get_model(), self._calculation, structure) + # Store hessian information + self.store_hessian_data(self.systems[self.end_key], structure) + + # Only overwrite positions, if an optimization was attempted + if self.optimization_attempts_count != 0: + # Overwrite positions + org_atoms = structure.get_atoms() + position_shift = self.systems[self.end_key].structure.positions - org_atoms.positions + # # # Store Position Shift + self.store_property(self._properties, "position_shift", "DenseMatrixProperty", + position_shift, self._calculation.get_model(), self._calculation, structure) + structure.set_atoms(self.systems[self.end_key].structure) + # # # Overwrite graph if structure has changed, decision list and idx map might have changed + self.add_graph(structure, final_sp_results.bond_orders) + else: + self.store_hessian_data(self.systems[self.start_key], structure) + self.capture_raw_output() + self.raise_named_exception( + "Structure could not be validated to be a minimum. Hessian information is stored anyway." + ) + + return self.postprocess_calculation_context() + + @staticmethod + # TODO: add proper typing + def has_wavenumber_below_threshold(calc_results, atoms, wavenumber_threshold: float): + import scine_utilities as utils + true_minimum = False + # Get normal modes and frequencies + modes_container = utils.normal_modes.calculate(calc_results.hessian, atoms.elements, atoms.positions) + # Wavenumbers in cm-1 + wavenumbers = modes_container.get_wave_numbers() + # Get minimal frequency + min_wavenumber = np.min(wavenumbers) + if min_wavenumber < 0.0 and abs(min_wavenumber) > wavenumber_threshold: + true_minimum = True + + return true_minimum, modes_container + + def _distort_structure_and_load_calculator(self, mode_container, settings_manager): + import scine_utilities as utils + wavenumbers = np.asarray(mode_container.get_wave_numbers()) + img_wavenumber_indices = np.where(wavenumbers < 0.0)[0] + modes = [utils.normal_modes.mode(wavenumbers[i], mode_container.get_mode(i)) + for i in img_wavenumber_indices] + + # Distortion according to inversion point + if self.settings[self.job_key]['fix_distortion_step_size'] == -1.0: + max_steps = [utils.normal_modes.get_harmonic_inversion_point( + wavenumbers[i], self.settings[self.job_key]['distortion_inversion_point']) + for i in img_wavenumber_indices] + else: + max_steps = [self.settings[self.job_key]['fix_distortion_step_size'] * len(modes)] + + # Only one direction, could be improved by distorting in other direction + # # # Displace along modes with img wavenumbers and load calculator + distorted_positions = utils.geometry.displace_along_modes( + self.systems[self.end_key].structure.positions, + modes, max_steps) + distorted_key = "distorted_guess_" + str(self.optimization_attempts_count) + xyz_name = distorted_key + ".xyz" + # Write file and load into calculator + distorted_atoms = utils.AtomCollection( + self.systems[self.end_key].structure.elements, distorted_positions) + utils.io.write(xyz_name, distorted_atoms) + distorted_calculator = utils.core.load_system_into_calculator( + xyz_name, + self._calculation.get_model().method_family, + **settings_manager.calculator_settings, + ) + # Load into systems and update inputs for next step + self.systems[distorted_key] = distorted_calculator + self.inputs = [distorted_key] + + def sort_settings(self, task_settings: dict): + """ + Take settings of configured calculation and save them in class member. Throw exception for unknown settings. + + Notes + ----- + * Requires run configuration + * May throw exception + + Parameters + ---------- + task_settings :: dict + A dictionary from which the settings are taken + """ + self.extract_connectivity_settings_from_dict(task_settings) + # Dissect settings into individual user task_settings + for key, value in task_settings.items(): + for task in self.settings.keys(): + if task == self.job_key: + if key in self.settings[task].keys(): + self.settings[task][key] = value + break # found right task, leave inner loop + else: + indicator_length = len(task) + 1 # underscore to avoid ambiguities + if key[:indicator_length] == task + "_": + self.settings[task][key[indicator_length:]] = value + break # found right task, leave inner loop + else: + self.raise_named_exception( + "The key '{}' was not recognized.".format(key) + ) diff --git a/scine_puffin/jobs/scine_hessian.py b/scine_puffin/jobs/scine_hessian.py index 82f419d..67c2ea2 100644 --- a/scine_puffin/jobs/scine_hessian.py +++ b/scine_puffin/jobs/scine_hessian.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/scine_irc_scan.py b/scine_puffin/jobs/scine_irc_scan.py index eb8e853..60c6b73 100644 --- a/scine_puffin/jobs/scine_irc_scan.py +++ b/scine_puffin/jobs/scine_irc_scan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -31,11 +31,11 @@ class ScineIrcScan(OptimizationJob): stop_on_error :: bool If ``False``, the optimization does not need to fully converge but will be accepted as a success even if it reaches the maximum amounts of - optimization cycles. Also the resulting structures will be flagged as + optimization cycles. Also, the resulting structures will be flagged as ``minimum_guess`` if this option is set ot be ``False``. (Default: ``True``) irc_mode :: int - The mode to follow during the IRC scan. By default the first mode (0). + The mode to follow during the IRC scan. By default, the first mode (0). (mode with the larges imaginary frequency will be followed). All settings that are recognized by the SCF program chosen. @@ -97,9 +97,10 @@ def run(self, manager, calculation, config: Configuration) -> bool: self.raise_named_exception(results_err) scine_helper.update_model(systems[keys[0]], self._calculation, config) - label = db.Label.MINIMUM_OPTIMIZED + is_surface = structure.has_property("surface_atom_indices") + label = db.Label.SURFACE_OPTIMIZED if is_surface else db.Label.MINIMUM_OPTIMIZED if not success and not stop_on_error: - label = db.Label.MINIMUM_GUESS + label = db.Label.SURFACE_GUESS if is_surface else db.Label.MINIMUM_GUESS calculation.set_comment( "Optimization did not fully converge for one or both sides. 'forward' and " "'backward' structures are stored as '" diff --git a/scine_puffin/jobs/scine_react_complex_afir.py b/scine_puffin/jobs/scine_react_complex_afir.py index 768e343..d002706 100644 --- a/scine_puffin/jobs/scine_react_complex_afir.py +++ b/scine_puffin/jobs/scine_react_complex_afir.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -251,7 +251,7 @@ def run(self, manager, calculation, config: Configuration) -> bool: else: rc_opt_graph = None if rc_opt_graph is not None: - self.save_barrierless_reaction(rc_opt_graph, program_helper) + self.save_barrierless_reaction_from_rcopt(rc_opt_graph, program_helper) else: calculation.set_comment(self.name + " AFIR Job: No TS guess found.") self.capture_raw_output() @@ -317,65 +317,7 @@ def run(self, manager, calculation, config: Configuration) -> bool: ) """ TS Optimization """ - inputs = self.output("bspline") - self.setup_automatic_mode_selection("tsopt") - print("TSOpt Settings:") - print(self.settings["tsopt"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_tsopt_task', self.systems, inputs, **self.settings["tsopt"] - ) - self.throw_if_not_successful( - success, self.systems, inputs, ["energy"], "TS optimization failed:\n" - ) - - """ TS Hessian """ - inputs = self.output("tsopt") - self.systems, success = readuct.run_hessian_task(self.systems, inputs) - self.throw_if_not_successful( - success, - self.systems, - inputs, - ["energy", "hessian", "thermochemistry"], - "TS Hessian failed:\n", - ) - - """ IRC """ - inputs = self.output("tsopt") - print("IRC Settings:") - print(self.settings["irc"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_irc_task', self.systems, inputs, **self.settings["irc"]) - - """ IRC Opt""" - inputs = self.output("irc") - print("IRC Optimization Settings:") - print(self.settings["ircopt"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[0]], **self.settings["ircopt"]) - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[1]], **self.settings["ircopt"]) - - """ Check whether we have a valid IRC """ - initial_charge = settings_manager.calculator_settings[utils.settings_names.molecular_charge] - product_names, start_names = self.irc_sanity_checks_and_analyze_sides( - initial_charge, self.check_charges, inputs, settings_manager.calculator_settings) - if product_names is None: # IRC did not pass checks, reason has been set as comment, complete job - self.verify_connection() - self.capture_raw_output() - scine_helper.update_model( - self.systems[self.output("tsopt")[0]], - self._calculation, - self.config, - ) - raise breakable.Break - - """ Store new starting material conformer(s) """ - if start_names is not None: - start_structures = self.store_start_structures( - start_names, program_helper, "tsopt") - else: - start_structures = self._calculation.get_structures() - - self.react_postprocessing(product_names, program_helper, "tsopt", start_structures) + tsguess = self.output("bspline")[0] + self._tsopt_hess_irc_ircopt_postprocessing(tsguess, settings_manager, program_helper) return self.postprocess_calculation_context() diff --git a/scine_puffin/jobs/scine_react_complex_nt.py b/scine_puffin/jobs/scine_react_complex_nt.py index a963403..ce478ff 100644 --- a/scine_puffin/jobs/scine_react_complex_nt.py +++ b/scine_puffin/jobs/scine_react_complex_nt.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -220,9 +220,6 @@ def __init__(self): @job_configuration_wrapper def run(self, manager, calculation, config: Configuration) -> bool: - import scine_readuct as readuct - import scine_utilities as utils - # Everything that calls SCINE is enclosed in a try/except block with breakable(calculation_context(self)): settings_manager, program_helper = self.reactive_complex_preparations() @@ -241,7 +238,7 @@ def run(self, manager, calculation, config: Configuration) -> bool: else: rc_opt_graph = None if rc_opt_graph is not None: - self.save_barrierless_reaction(rc_opt_graph, program_helper) + self.save_barrierless_reaction_from_rcopt(rc_opt_graph, program_helper) else: calculation.set_comment(self.name + " NT Job: No TS guess found.") self.capture_raw_output() @@ -252,86 +249,7 @@ def run(self, manager, calculation, config: Configuration) -> bool: ) raise breakable.Break - """ TSOPT JOB """ - inputs = self.output("nt") - self.setup_automatic_mode_selection("tsopt") - print("TSOpt Settings:") - print(self.settings["tsopt"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_tsopt_task', self.systems, inputs, **self.settings["tsopt"] - ) - self.throw_if_not_successful( - success, - self.systems, - self.output("tsopt"), - ["energy"], - "TS optimization failed:\n", - ) - - """ TS HESSIAN """ - inputs = self.output("tsopt") - self.systems, success = readuct.run_hessian_task(self.systems, inputs) - self.throw_if_not_successful( - success, - self.systems, - inputs, - ["energy", "hessian", "thermochemistry"], - "TS Hessian calculation failed.\n", - ) - - if self.n_imag_frequencies(inputs[0]) != 1: - self.raise_named_exception( - "Error: " - + self.name - + " failed with message: " - + "TS has incorrect number of imaginary frequencies." - ) - - """ IRC JOB """ - # IRC (only a few steps to allow decent graph extraction) - print("IRC Settings:") - print(self.settings["irc"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_irc_task', self.systems, inputs, **self.settings["irc"] - ) - - """ IRC OPT JOB """ - # Run a small energy minimization after initial IRC - inputs = self.output("irc") - print("IRC Optimization Settings:") - print(self.settings["ircopt"], "\n") - for i in inputs: - atoms = self.systems[i].structure - self.random_displace_atoms(atoms) - self.systems[i].positions = atoms.positions - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[0]], **self.settings["ircopt"] - ) - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[1]], **self.settings["ircopt"] - ) - - """ Check whether we have a valid IRC """ - initial_charge = settings_manager.calculator_settings[utils.settings_names.molecular_charge] - product_names, start_names = self.irc_sanity_checks_and_analyze_sides( - initial_charge, self.check_charges, inputs, settings_manager.calculator_settings) - if product_names is None: # IRC did not pass checks, reason has been set as comment, complete job - self.verify_connection() - self.capture_raw_output() - scine_helper.update_model( - self.systems[self.output("tsopt")[0]], - self._calculation, - self.config, - ) - raise breakable.Break - - """ Store new starting material conformer(s) """ - if start_names is not None: - start_structures = self.store_start_structures( - start_names, program_helper, "tsopt") - else: - start_structures = self._calculation.get_structures() - - self.react_postprocessing(product_names, program_helper, "tsopt", start_structures) + tsguess = self.output("nt")[0] + self._tsopt_hess_irc_ircopt_postprocessing(tsguess, settings_manager, program_helper) return self.postprocess_calculation_context() diff --git a/scine_puffin/jobs/scine_react_complex_nt2.py b/scine_puffin/jobs/scine_react_complex_nt2.py index e7cca89..36c355d 100644 --- a/scine_puffin/jobs/scine_react_complex_nt2.py +++ b/scine_puffin/jobs/scine_react_complex_nt2.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -222,9 +222,6 @@ def __init__(self): @job_configuration_wrapper def run(self, _, calculation, config: Configuration) -> bool: - import scine_readuct as readuct - import scine_utilities as utils - # Everything that calls SCINE is enclosed in a try/except block with breakable(calculation_context(self)): settings_manager, program_helper = self.reactive_complex_preparations() @@ -242,7 +239,7 @@ def run(self, _, calculation, config: Configuration) -> bool: else: rc_opt_graph = None if rc_opt_graph is not None: - self.save_barrierless_reaction(rc_opt_graph, program_helper) + self.save_barrierless_reaction_from_rcopt(rc_opt_graph, program_helper) else: calculation.set_comment(self.name + " NT Job: No TS guess found.") self.capture_raw_output() @@ -253,82 +250,7 @@ def run(self, _, calculation, config: Configuration) -> bool: ) raise breakable.Break - """ TSOPT JOB """ - inputs = self.output("nt") - self.setup_automatic_mode_selection("tsopt") - print("TSOpt Settings:") - print(self.settings["tsopt"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_tsopt_task', self.systems, inputs, **self.settings["tsopt"]) - self.throw_if_not_successful( - success, - self.systems, - self.output("tsopt"), - ["energy"], - "TS optimization failed:\n", - ) - - """ TS HESSIAN """ - inputs = self.output("tsopt") - self.systems, success = readuct.run_hessian_task(self.systems, inputs) - self.throw_if_not_successful( - success, - self.systems, - inputs, - ["energy", "hessian", "thermochemistry"], - "TS Hessian calculation failed.\n", - ) - - if self.n_imag_frequencies(inputs[0]) != 1: - self.raise_named_exception( - "Error: " - + self.name - + " failed with message: " - + "TS has incorrect number of imaginary frequencies." - ) - - """ IRC JOB """ - # IRC (only a few steps to allow decent graph extraction) - print("IRC Settings:") - print(self.settings["irc"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_irc_task', self.systems, inputs, **self.settings["irc"]) - - """ IRC OPT JOB """ - # Run a small energy minimization after initial IRC - inputs = self.output("irc") - print("IRC Optimization Settings:") - print(self.settings["ircopt"], "\n") - for i in inputs: - atoms = self.systems[i].structure - self.random_displace_atoms(atoms) - self.systems[i].positions = atoms.positions - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[0]], **self.settings["ircopt"]) - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[1]], **self.settings["ircopt"]) - - """ Check whether we have a valid IRC """ - initial_charge = settings_manager.calculator_settings[utils.settings_names.molecular_charge] - product_names, start_names = self.irc_sanity_checks_and_analyze_sides( - initial_charge, self.check_charges, inputs, settings_manager.calculator_settings) - if product_names is None: # IRC did not pass checks, reason has been set as comment, complete job - self.verify_connection() - self.capture_raw_output() - scine_helper.update_model( - self.systems[self.output("tsopt")[0]], - self._calculation, - self.config, - ) - raise breakable.Break - - """ Store new starting material conformer(s) """ - if start_names is not None: - start_structures = self.store_start_structures( - start_names, program_helper, "tsopt") - else: - start_structures = self._calculation.get_structures() - - self.react_postprocessing(product_names, program_helper, "tsopt", start_structures) + tsguess = self.output("nt")[0] + self._tsopt_hess_irc_ircopt_postprocessing(tsguess, settings_manager, program_helper) return self.postprocess_calculation_context() diff --git a/scine_puffin/jobs/scine_single_point.py b/scine_puffin/jobs/scine_single_point.py index eb1d4d5..c6e77ba 100644 --- a/scine_puffin/jobs/scine_single_point.py +++ b/scine_puffin/jobs/scine_single_point.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/scine_step_refinement.py b/scine_puffin/jobs/scine_step_refinement.py index e4c5237..dd79ff9 100644 --- a/scine_puffin/jobs/scine_step_refinement.py +++ b/scine_puffin/jobs/scine_step_refinement.py @@ -1,11 +1,10 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ from scine_puffin.config import Configuration -from scine_puffin.utilities import scine_helper from .templates.job import breakable, calculation_context, job_configuration_wrapper from .templates.scine_react_job import ReactJob from scine_puffin.utilities import masm_helper @@ -93,7 +92,7 @@ class ScineStepRefinement(ReactJob): of 2 (default) will check triplet and quintet for a singlet and will check singlet, quintet und septet for triplet. - Additionally all settings that are recognized by the SCF program chosen. + Additionally, all settings that are recognized by the SCF program chosen. are also available. These settings are not required to be prepended with any flag. @@ -166,8 +165,6 @@ def __init__(self): @job_configuration_wrapper def run(self, manager, calculation, config: Configuration) -> bool: - import scine_readuct as readuct - import scine_utilities as utils import scine_database as db # Everything that calls SCINE is enclosed in a try/except block with breakable(calculation_context(self)): @@ -178,69 +175,13 @@ def run(self, manager, calculation, config: Configuration) -> bool: start_structures = [db.Structure(ident, self._structures) for ident in all_struc_ids] settings_manager.separate_settings(self._calculation.get_settings()) self.sort_settings(settings_manager.task_settings) - """ TSOPT JOB """ + """ TSOPT Hessian IRC IRCOPT """ ts_guess, keys = settings_manager.prepare_readuct_task(ts_struc, self._calculation, self._calculation.get_settings(), config["resources"]) self.systems[keys[0]] = ts_guess[keys[0]] - print("TSOpt Settings:") - print(self.settings["tsopt"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_tsopt_task', self.systems, keys, **self.settings["tsopt"]) - self.throw_if_not_successful( - success, - self.systems, - self.output("tsopt"), - ["energy"], - "TS optimization failed:\n", - ) - """ TS HESSIAN """ - inputs = self.output("tsopt") - self.systems, success = readuct.run_hessian_task(self.systems, inputs) - self.throw_if_not_successful( - success, - self.systems, - inputs, - ["energy", "hessian", "thermochemistry"], - "TS Hessian calculation failed.\n", - ) - if self.n_imag_frequencies(inputs[0]) != 1: - self.raise_named_exception( - "Error: " - + self.name - + " failed with message: " - + "TS has incorrect number of imaginary frequencies." - ) - """ IRC JOB """ - # IRC (only a few steps to allow decent graph extraction) - print("IRC Settings:") - print(self.settings["irc"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_irc_task', self.systems, inputs, **self.settings["irc"]) - """ IRC OPT JOB """ - # Run a small energy minimization after initial IRC - inputs = self.output("irc") - print("IRC Optimization Settings:") - print(self.settings["ircopt"], "\n") - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[0]], **self.settings["ircopt"]) - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [inputs[1]], **self.settings["ircopt"]) - - """ Check whether we have a valid IRC """ - initial_charge = settings_manager.calculator_settings[utils.settings_names.molecular_charge] - product_names, start_names = self.irc_sanity_checks_and_analyze_sides( - initial_charge, self.check_charges, inputs, settings_manager.calculator_settings) - if product_names is None: # IRC did not pass checks, reason has been set as comment, complete job - self.verify_connection() - self.capture_raw_output() - scine_helper.update_model( - self.systems[self.output("tsopt")[0]], - self._calculation, - self.config, - ) - raise breakable.Break + product_names, start_names = self._tsopt_hess_irc_ircopt(keys[0], settings_manager) """ Store new starting material conformer(s) """ if start_names is not None: @@ -257,7 +198,7 @@ def run(self, manager, calculation, config: Configuration) -> bool: return self.postprocess_calculation_context() - def optimize_reactants(self, reactant_structures, settings_manager, config): + def optimize_reactants(self, reactant_structures, settings_manager, config: Configuration): """ Optimize the reactant structures and saves them in the database. @@ -265,6 +206,7 @@ def optimize_reactants(self, reactant_structures, settings_manager, config): ----- * writes reactant calculators to self.systems * May throw exception. + * Requires run configuration Parameters ---------- @@ -282,10 +224,6 @@ def optimize_reactants(self, reactant_structures, settings_manager, config): optimized_structures :: List[scine_database.Structure] The optimized reactant structures. """ - import scine_readuct as readuct - import scine_utilities as utils - import scine_database as db - print("Reactant Opt Settings:") print(self.settings["opt"], "\n") @@ -313,32 +251,15 @@ def optimize_reactants(self, reactant_structures, settings_manager, config): "Reactant optimization failed:\n", ) - # Calculate the bond orders - self.systems, success = readuct.run_single_point_task( - self.systems, - [name], - spin_propensity_check=self.settings[self.job_key]["spin_propensity_check"], - require_bond_orders=True, - ) - self.throw_if_not_successful( - success, - self.systems, - [name], - ["energy", "bond_orders"], - "Reactant optimization failed:\n", - ) - - pbc_string = self.systems[name].settings.get(utils.settings_names.periodic_boundaries, "") - masm_results = masm_helper.get_molecules_result( + bond_orders, self.systems = self.make_bond_orders_from_calc(self.systems, name) + cbor = masm_helper.get_cbor_graph( self.systems[name].structure, - self.make_bond_orders_from_calc(self.systems, name), + bond_orders, self.connectivity_settings, - pbc_string, + self._calculation.get_model().periodic_boundaries, + self.surface_indices(structure) ) - structure_label = db.Label.MINIMUM_OPTIMIZED - if len(masm_results.molecules) > 1: - structure_label = db.Label.COMPLEX_OPTIMIZED - + structure_label = self._determine_new_label_based_on_graph(self.systems[name], cbor) new_structure = self.create_new_structure(self.systems[name], structure_label) self.transfer_properties(structure, new_structure) self.store_energy(self.systems[name], new_structure) @@ -346,11 +267,11 @@ def optimize_reactants(self, reactant_structures, settings_manager, config): self._properties, "bond_orders", "SparseMatrixProperty", - self.systems[name].get_results().bond_orders.matrix, + bond_orders.matrix, self._calculation.get_model(), self._calculation, new_structure, ) - self.add_graph(new_structure, self.systems[name].get_results().bond_orders) + self.add_graph(new_structure, bond_orders) optimized_structures.append(new_structure) return reactant_names, optimized_structures diff --git a/scine_puffin/jobs/scine_ts_optimization.py b/scine_puffin/jobs/scine_ts_optimization.py index f0a2b0c..4088885 100644 --- a/scine_puffin/jobs/scine_ts_optimization.py +++ b/scine_puffin/jobs/scine_ts_optimization.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/sleep.py b/scine_puffin/jobs/sleep.py index 2630ff5..3b5db02 100644 --- a/scine_puffin/jobs/sleep.py +++ b/scine_puffin/jobs/sleep.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/swoose_qmmm_forces.py b/scine_puffin/jobs/swoose_qmmm_forces.py index e17fe34..bed5cdf 100644 --- a/scine_puffin/jobs/swoose_qmmm_forces.py +++ b/scine_puffin/jobs/swoose_qmmm_forces.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/templates/__init__.py b/scine_puffin/jobs/templates/__init__.py index 4d46073..d85c6e5 100644 --- a/scine_puffin/jobs/templates/__init__.py +++ b/scine_puffin/jobs/templates/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/templates/job.py b/scine_puffin/jobs/templates/job.py index 689b865..ce0dea3 100644 --- a/scine_puffin/jobs/templates/job.py +++ b/scine_puffin/jobs/templates/job.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -105,7 +105,7 @@ def required_programs() -> List[str]: """ raise NotImplementedError - def prepare(self, job_dir: str, id): + def prepare(self, job_dir: str, id) -> None: """ Prepares the actual job. This function has to be implemented by any job that shall be added to @@ -123,7 +123,7 @@ def prepare(self, job_dir: str, id): if self.work_dir and not os.path.exists(self.work_dir): os.makedirs(self.work_dir) - def archive(self, archive: str): + def archive(self, archive: str) -> None: """ Archives all files existent in the job's directory into tarball named with the job's ID. The tarball is then moved to the given destination. @@ -147,7 +147,7 @@ def archive(self, archive: str): os.makedirs(archive) shutil.move(tar_gen_path, tar_archive_path) - def clear(self): + def clear(self) -> None: """ Clears the directory in which the job was run. """ @@ -292,7 +292,7 @@ def configure_run(self, manager, calculation, config: Configuration): self.set_calculation(calculation) self.config = config - def get_collections(self, manager): + def get_collections(self, manager) -> None: """ Saves Scine Database collections as class variables @@ -310,7 +310,7 @@ def get_collections(self, manager): self._structures = manager.get_collection("structures") self._flasks = manager.get_collection("flasks") - def set_calculation(self, calculation): + def set_calculation(self, calculation) -> None: """ Sets the current Calculation for this job and ensures connection @@ -592,10 +592,10 @@ class breakable(object): Helper to allow breaking out of the contex manager early > with breakable(open(path)) as f: - > print 'before condition' + > print('before condition') > if condition: > raise breakable.Break - > print 'after condition' + > print('after condition') """ class Break(Exception): diff --git a/scine_puffin/jobs/templates/kinetic_modeling_jobs.py b/scine_puffin/jobs/templates/kinetic_modeling_jobs.py new file mode 100644 index 0000000..1e6016d --- /dev/null +++ b/scine_puffin/jobs/templates/kinetic_modeling_jobs.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +import numpy as np +from typing import List + +import scine_database as db + +from ..templates.job import Job, job_configuration_wrapper +from ...utilities.compound_and_flask_helpers import get_compound_or_flask +from scine_puffin.config import Configuration + + +class KineticModelingJob(Job): + """ + Abstract base class for the RMS kinetic modeling and KiNetX kinetic modeling jobs. + """ + def __init__(self): + super().__init__() + self.name = "KineticModelingJob" + self.model: db.Model = db.Model("PM6", "PM6", "") + self.c_max_label = "max_concentration" + self.c_final_label = "final_concentration" + self.c_flux_label = "concentration_flux" + self.r_flux_label = "_reaction_edge_flux" + self.r_forward_label = "_forward_edge_flux" + self.r_backward_label = "_backward_edge_flux" + + @job_configuration_wrapper + def run(self, manager, calculation, config: Configuration) -> bool: + """See Job.run()""" + raise NotImplementedError + + @staticmethod + def required_programs(): + raise NotImplementedError + + def _write_concentrations_to_centroids(self, aggregate_ids: List[db.ID], aggregate_types: List[db.CompoundOrFlask], + reaction_ids: List[db.ID], aggregate_wise_concentrations: List[np.ndarray], + reaction_wise_concentrations: List[np.ndarray], + aggregate_wise_labels: List[str], + reaction_wise_labels: List[str], + results: db.Results, + post_fix: str = "", + add_flask_result_to_compounds: bool = False): + assert len(aggregate_wise_concentrations) == len(aggregate_wise_labels) + assert len(reaction_wise_concentrations) == len(reaction_wise_labels) + if add_flask_result_to_compounds: + original_concentration_data = np.zeros((len(aggregate_ids), len(aggregate_wise_concentrations))) + for i, concentrations in enumerate(aggregate_wise_concentrations): + original_concentration_data[:, 0] = concentrations + concentration_data = self._resolve_flask_to_compound_mapping(original_concentration_data, aggregate_ids, + aggregate_types) + for i, concentrations in enumerate(aggregate_wise_concentrations): + concentrations = concentration_data[:, i] + + print("Concentration Properties") + for i, (a_id, a_type) in enumerate(zip(aggregate_ids, aggregate_types)): + centroid = self._get_aggregate_centroid(a_id, a_type) + for concentrations, concentration_label in zip(aggregate_wise_concentrations, + aggregate_wise_labels): + c = concentrations[i] + label = concentration_label + post_fix + self._write_concentration_property(centroid, label, c, results) + + print("Reaction flux properties") + for i, r_id in enumerate(reaction_ids): + centroid = self._get_reaction_centroid(r_id) + for concentrations, concentration_label in zip(reaction_wise_concentrations, + reaction_wise_labels): + c = concentrations[i] + label = r_id.string() + concentration_label + post_fix + self._write_concentration_property(centroid, label, c, results) + + def _write_concentration_property(self, centroid: db.Structure, label: str, value: float, results: db.Results): + prop = db.NumberProperty.make(label, self.model, value, self._properties) + results.add_property(prop.id()) + centroid.add_property(label, prop.id()) + prop.set_structure(centroid.id()) + print("struc", centroid.id().string(), " prop ", prop.id().string(), " ", label, " ", value) + + def _resolve_flask_to_compound_mapping(self, concentration_data, aggregate_id_list, + aggregate_type_list): + i = 0 + new_concentration_data = np.copy(concentration_data) + for a_id, a_type in zip(aggregate_id_list, aggregate_type_list): + if a_type == db.CompoundOrFlask.FLASK: + flask = db.Flask(a_id, self._flasks) + compounds_in_flask = flask.get_compounds() + for c_id in compounds_in_flask: + if c_id in aggregate_id_list: + j = aggregate_id_list.index(c_id) + new_concentration_data[j, :] += concentration_data[i, :] + i += 1 + return new_concentration_data + + def _disable_all_aggregates(self): + """ + Disable the exploration of all aggregates. + """ + for compound in self._compounds.iterate_all_compounds(): + compound.link(self._compounds) + compound.disable_exploration() + for flask in self._flasks.iterate_all_flasks(): + flask.link(self._flasks) + flask.disable_exploration() + + def _get_reaction_centroid(self, r_id): + a_id = db.Reaction(r_id, self._reactions).get_reactants(db.Side.LHS)[0][0] + a_type = db.Reaction(r_id, self._reactions).get_reactant_types(db.Side.LHS)[0][0] + aggregate = get_compound_or_flask(a_id, a_type, self._compounds, self._flasks) + return db.Structure(aggregate.get_centroid(), self._structures) + + def _get_aggregate_centroid(self, a_id, a_type): + aggregate = get_compound_or_flask(a_id, a_type, self._compounds, self._flasks) + return db.Structure(aggregate.get_centroid(), self._structures) diff --git a/scine_puffin/jobs/templates/scine_connectivity_job.py b/scine_puffin/jobs/templates/scine_connectivity_job.py index 157c604..d5d2b71 100644 --- a/scine_puffin/jobs/templates/scine_connectivity_job.py +++ b/scine_puffin/jobs/templates/scine_connectivity_job.py @@ -1,12 +1,15 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ -from typing import Dict, Set +from typing import Dict, Set, Tuple, Optional, Union, List, Any import sys +import scine_database as db +import scine_utilities as utils + from .job import job_configuration_wrapper from .scine_job import ScineJob from scine_puffin.config import Configuration @@ -41,7 +44,7 @@ def run(self, manager, calculation, config: Configuration) -> bool: def required_programs(): return ["database", "molassembler", "readuct", "utils"] - def connectivity_settings_from_only_connectivity_settings(self): + def connectivity_settings_from_only_connectivity_settings(self) -> None: """ Overwrite default connectivity settings based on settings of configured Calculation and expect no other settings to be present. Throws if there are other settings present. @@ -60,7 +63,7 @@ def connectivity_settings_from_only_connectivity_settings(self): + " was/were not recognized." ) - def extract_connectivity_settings_from_dict(self, dictionary: Dict[str, bool]): + def extract_connectivity_settings_from_dict(self, dictionary: Dict[str, bool]) -> None: """ Overwrite default connectivity settings based on given dictionary and removes those from the dictionary. @@ -68,7 +71,9 @@ def extract_connectivity_settings_from_dict(self, dictionary: Dict[str, bool]): for key, value in self.connectivity_settings.items(): self.connectivity_settings[key] = dictionary.pop(key, value) - def make_bond_orders_from_calc(self, systems: dict, key: str): + def make_bond_orders_from_calc(self, systems: dict, key: str, + surface_indices: Optional[Union[List[int], Set[int]]] = None) \ + -> Tuple[utils.BondOrderCollection, Dict[str, utils.core.Calculator]]: """ Gives bond orders for the specified system based on the connectivity settings of this class. @@ -83,17 +88,21 @@ def make_bond_orders_from_calc(self, systems: dict, key: str): Dictionary of system names to calculators representing them key :: str Index into systems dictionary to get bond orders for + surface_indices :: Optional[Union[List[int], Set[int]]] + The indices of the atoms for which the rules of solid state atoms shall be applied. Returns ------- bond_orders :: utils.BondOrderCollection (Scine::Utilties::BondOrderCollection) The bond orders of the system + systems :: Dict[str, utils.core.Calculator] + Dictionary of system names to calculators representing them, + updated with the results of the single point calculation requesting bond orders. """ import scine_readuct as readuct - # Distance based bond orders if self.connectivity_settings["only_distance_connectivity"]: - bond_orders = self.distance_bond_orders(systems[key].structure) + bond_orders = self.distance_bond_orders(systems[key].structure, surface_indices) # Bond order calculation with readuct else: if not self.expected_results_check(systems, [key], ["energy", "bond_orders", "atomic_charges"])[0]: @@ -106,9 +115,11 @@ def make_bond_orders_from_calc(self, systems: dict, key: str): ) bond_orders = systems[key].get_results().bond_orders - return bond_orders + return bond_orders, systems - def make_graph_from_calc(self, systems: dict, key: str) -> str: + def make_graph_from_calc(self, systems: dict, key: str, + surface_indices: Optional[Union[List[int], Set[int]]] = None) \ + -> Tuple[str, Dict[str, utils.core.Calculator]]: """ Runs bond orders for the specified name in the dictionary of systems if not present already and return cbor graph for based on them. @@ -124,25 +135,95 @@ def make_graph_from_calc(self, systems: dict, key: str) -> str: Dictionary of system names to calculators representing them key :: str Index into systems dictionary to get bond orders for + surface_indices :: Optional[Union[List[int], Set[int]]] + The indices of the atoms for which the rules of solid state atoms shall be applied. Returns ------- graph_cbor :: str Serialized representation of interpreted molassembler molecule. + systems :: Dict[str, utils.core.Calculator] + Dictionary of system names to calculators representing them, + """ - import scine_utilities as utils + if surface_indices is None: + all_indices = self.surface_indices_all_structures() + if all_indices: + # if we have surface indices in any start structure + start_structures = [db.Structure(s, self._structures) for s in self._calculation.get_structures()] + n_start_atoms = sum(len(s.get_atoms()) for s in start_structures + if s.get_label() != db.Label.SURFACE_ADSORPTION_GUESS) + n_system_atoms = len(systems[key].structure) + if n_system_atoms == n_start_atoms: + surface_indices = all_indices + else: + for s in start_structures: + if len(s.get_atoms()) == n_system_atoms: + potential_indices = self.surface_indices(s) + if potential_indices: + surface_indices = potential_indices + break + else: + self.raise_named_exception(f"Start structures of calculation includes surface indices, " + f"but these could not propagated to the given system {key}") if self.connectivity_settings["only_distance_connectivity"]: - bond_orders = self.distance_bond_orders(systems[key].structure) + bond_orders = self.distance_bond_orders(systems[key].structure, surface_indices) else: bond_orders = systems[key].get_results().bond_orders if bond_orders is None: - bond_orders = self.make_bond_orders_from_calc(systems, key) + bond_orders, systems = self.make_bond_orders_from_calc(systems, key, surface_indices) + pbc_string = systems[key].settings.get(utils.settings_names.periodic_boundaries, "") + return masm_helper.get_cbor_graph( + systems[key].structure, + bond_orders, + self.connectivity_settings, + pbc_string, + surface_indices + ), systems + + def make_masm_result_from_calc(self, systems: dict, key: str, + unimportant_atoms: Optional[Union[List[int], Set[int]]]) \ + -> Tuple[Any, Dict[str, utils.core.Calculator]]: + """ + Gives Molassembler interpret result for the specified system based on the connectivity settings of this + class. + + Notes + ----- + * Requires run configuration + * May throw exception + + Parameters + ---------- + systems :: Dict[str, utils.core.Calculator] + Dictionary of system names to calculators representing them + key :: str + Index into systems dictionary to get bond orders for + unimportant_atoms :: Optional[Union[List[int], Set[int]]] + The indices of atoms for which no stereopermutators shall be determined. + Returns + ------- + masm_result :: masm.interpret.MoleculesResult (Scine::Molassembler::interpret::MoleculesResult) + The interpretation result + systems :: Dict[str, utils.core.Calculator] + Dictionary of system names to calculators representing them, + updated with the results of the single point calculation requesting bond orders. + """ + bond_orders, systems = self.make_bond_orders_from_calc(systems, key, unimportant_atoms) pbc_string = systems[key].settings.get(utils.settings_names.periodic_boundaries, "") - return masm_helper.get_cbor_graph(systems[key].structure, bond_orders, self.connectivity_settings, pbc_string) + return masm_helper.get_molecules_result( + systems[key].structure, + bond_orders, + self.connectivity_settings, + pbc_string, + unimportant_atoms=unimportant_atoms + ), systems - def make_decision_lists_from_calc(self, systems: dict, key: str): + def make_decision_lists_from_calc(self, systems: dict, key: str, + surface_indices: Optional[Union[List[int], Set[int]]] = None) \ + -> Tuple[List[str], Dict[str, utils.core.Calculator]]: """ Calculates bond orders for the specified name in the dictionary of systems if not present already. @@ -159,72 +240,86 @@ def make_decision_lists_from_calc(self, systems: dict, key: str): Dictionary of system names to calculators representing them key :: str Index into systems dictionary to get bond orders for + surface_indices :: Optional[Union[List[int], Set[int]]] + The indices of the atoms for which the rules of solid state atoms shall be applied. + Returns ------- decision_lists :: List[str] Decision lists per molecule in structure. + systems :: Dict[str, utils.core.Calculator] + Dictionary of system names to calculators representing them, + updated with the results of a possible single point calculation requesting bond orders. """ - import scine_utilities as utils - if self.connectivity_settings["only_distance_connectivity"]: - bond_orders = self.distance_bond_orders(systems[key].structure) + bond_orders = self.distance_bond_orders(systems[key].structure, surface_indices) else: bond_orders = systems[key].get_results().bond_orders if bond_orders is None: - bond_orders = self.make_bond_orders_from_calc(systems, key) + bond_orders, systems = self.make_bond_orders_from_calc(systems, key, surface_indices) pbc_string = systems[key].settings.get(utils.settings_names.periodic_boundaries, "") return masm_helper.get_decision_lists( systems[key].structure, bond_orders, self.connectivity_settings, - pbc_string) + pbc_string, + surface_indices + ), systems - def surface_indices_all_structures(self): + def surface_indices_all_structures(self, start_structures: Optional[List[db.ID]] = None) -> Set[int]: """ - Get the combined surface indices of all structures of the configured calculation. Throws if a structure is - specified to be a surface but does not have surface_indices property. + Get the combined surface indices of all structures of the configured calculation except a + structure with the label db.Label.SURFACE_ADSORPTION_GUESS. + Throws if a structure is specified to be a surface but does not have surface_indices property. Notes ----- * Requires run configuration * May throw exception + Parameters + ---------- + start_structures :: Optional[List[db.ID]] + Optional list of the starting structure ids. If no list is given. The input + structures of the calculation are used. + Returns ------- surface_indices :: set A set of all surface indices over all structures combined assuming an atom ordering identical to the addition of all structures in their order within the calculation. """ - import scine_database as db - surface_indices = [] n_atoms = 0 - for sid in self._calculation.get_structures(): + if start_structures is None: + start_structures = self._calculation.get_structures() + for sid in start_structures: structure = db.Structure(sid, self._structures) + if structure.get_label() == db.Label.SURFACE_ADSORPTION_GUESS: + continue indices = self.surface_indices(structure) surface_indices += [index + n_atoms for index in indices] n_atoms += len(structure.get_atoms()) return set(surface_indices) - def surface_indices(self, structure) -> Set[int]: - import scine_database as db - + def surface_indices(self, structure: db.Structure) -> Set[int]: if "surface" in str(structure.get_label()).lower(): if not structure.has_property("surface_atom_indices"): self.raise_named_exception( "The structure is a surface, but has no property indicating " "the surface atom indices." ) - surface_atoms_prop = db.VectorProperty(structure.get_property("surface_atom_indices")) - surface_atoms_prop.link(self._properties) - surface_indices = set(surface_atoms_prop.get_data()) + surface_atoms_prop = db.VectorProperty(structure.get_property("surface_atom_indices"), self._properties) + data = surface_atoms_prop.get_data() + surface_indices = set([int(d) for d in data]) else: surface_indices = set() return surface_indices - def distance_bond_orders(self, structure): + def distance_bond_orders(self, structure: db.Structure, + surface_indices: Optional[Union[List[int], Set[int]]] = None) -> utils.BondOrderCollection: """ Construct bond order solely based on distance for either an AtomCollection or a Database Structure. @@ -237,32 +332,34 @@ def distance_bond_orders(self, structure): ---------- structure :: Union[utils.AtomCollection, db.Structure] Either an AtomCollection or a structure for which distance based bond orders are constructed. + surface_indices :: Optional[Union[List[int], Set[int]]] + The indices of the atoms for which the rules of solid state atoms shall be applied. Returns ------- bond_orders :: utils.BondOrderCollection (Scine::Utilties::BondOrderCollection) The bond orders of the structure. """ - import scine_database as db - import scine_utilities as utils - if isinstance(structure, db.Structure): atoms = structure.get_atoms() - surface_indices = self.surface_indices(structure) + if surface_indices is None: + surface_indices = self.surface_indices(structure) elif isinstance(structure, utils.AtomCollection): atoms = structure - surface_indices = self.surface_indices_all_structures() + if surface_indices is None: + surface_indices = self.surface_indices_all_structures() else: self.raise_named_exception( "Unknown type of provided structure for distance bond orders." ) + return # actually unreached, just avoid lint errors model = self._calculation.get_model() # generate bond orders depending on model and surface atoms if model.periodic_boundaries and model.periodic_boundaries != "none": # PeriodicSystem handles everything pbc = utils.PeriodicBoundaries(model.periodic_boundaries) - ps = utils.PeriodicSystem(pbc, atoms, surface_indices) + ps = utils.PeriodicSystem(pbc, atoms, set(surface_indices)) bond_orders = ps.construct_bond_orders() elif surface_indices: # Use mixture of Nearest Neighbors and BondDetector @@ -271,7 +368,8 @@ def distance_bond_orders(self, structure): bond_orders = utils.BondDetector.detect_bonds(atoms) return bond_orders - def add_graph(self, structure, bond_orders): + def add_graph(self, structure: db.Structure, bond_orders: utils.BondOrderCollection, + surface_indices: Optional[Union[List[int], Set[int]]] = None) -> None: """ Add molassembler graph information to a Database structure based on the given bond orders. @@ -281,23 +379,30 @@ def add_graph(self, structure, bond_orders): Either an AtomCollection or a structure for which distance based bond orders are constructed. bond_orders :: utils.BondOrderCollection (Scine::Utilties::BondOrderCollection) The bond orders of the structure. + surface_indices :: Optional[Union[List[int], Set[int]]] + The indices of the atoms for which the rules of solid state atoms shall be applied. """ print("\nGenerating Molassembler graphs") if structure.has_graph("masm_cbor_graph"): sys.stderr.write("Warning: The structure had a graph already. This graph will be replaced.") + if surface_indices is None: + surface_indices = self.surface_indices(structure) + masm_helper.add_masm_info( structure, bond_orders, self.connectivity_settings, - list(self.surface_indices(structure)), + surface_indices ) # Print the graph representations to the output - print("Generated graph:") - print("masm_cbor_graph: " + structure.get_graph("masm_cbor_graph")) - print("masm_decision_list: " + structure.get_graph("masm_decision_list")) + if structure.has_graph("masm_cbor_graph"): + print("Generated graph:") + print("masm_cbor_graph: " + structure.get_graph("masm_cbor_graph")) + if structure.has_graph("masm_decision_list"): + print("masm_decision_list: " + structure.get_graph("masm_decision_list")) - def query_bond_orders(self, structure): + def query_bond_orders(self, structure: db.Structure) -> db.SparseMatrixProperty: """ Query the given Database structure for bond orders based on the model of the configured calculation @@ -312,11 +417,9 @@ def query_bond_orders(self, structure): A database structure to query. Returns ------- - db_bond_orders :: db.SpareMatrixProperty (Scine::Database::SparseMatrixProperty) + db_bond_orders :: db.SparseMatrixProperty (Scine::Database::SparseMatrixProperty) A database property holding bond orders. """ - import scine_database as db - # db bond orders bos = structure.query_properties("bond_orders", self._calculation.get_model(), self._properties) if len(bos) == 0 and not self.connectivity_settings["enforce_bond_order_model"]: @@ -331,7 +434,8 @@ def query_bond_orders(self, structure): return db_bond_orders @staticmethod - def bond_orders_from_db_bond_orders(structure, db_bond_orders): + def bond_orders_from_db_bond_orders(structure: db.Structure, db_bond_orders: db.SparseMatrixProperty) \ + -> utils.BondOrderCollection: """ A shortcut to construct a BondOrderCollection from a Database Property holding bond orders. Returns @@ -339,8 +443,6 @@ def bond_orders_from_db_bond_orders(structure, db_bond_orders): bond_orders :: utils.BondOrderCollection (Scine::Utilties::BondOrderCollection) The bond orders of the structure. """ - import scine_utilities as utils - atoms = structure.get_atoms() bond_orders = utils.BondOrderCollection(len(atoms)) bond_orders.matrix = db_bond_orders.get_data() diff --git a/scine_puffin/jobs/templates/scine_hessian_job.py b/scine_puffin/jobs/templates/scine_hessian_job.py index c425fe5..13de77f 100644 --- a/scine_puffin/jobs/templates/scine_hessian_job.py +++ b/scine_puffin/jobs/templates/scine_hessian_job.py @@ -1,11 +1,15 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ import numpy as np +import scine_database as db +import scine_utilities as utils + + from .job import job_configuration_wrapper from .scine_job import ScineJob from scine_puffin.config import Configuration @@ -31,7 +35,7 @@ def run(self, manager, calculation, config: Configuration) -> bool: def required_programs(): return ["database", "readuct", "utils"] - def store_hessian_data(self, system, structure): + def store_hessian_data(self, system: utils.core.Calculator, structure: db.Structure) -> None: """ Stores results from a Hessian calculation and Thermochemistry for the specified structure based on the given calculator. Does not perform checks. @@ -47,11 +51,15 @@ def store_hessian_data(self, system, structure): structure :: db.Structure (Scine::Database::Structure) A structure for which the property is saved. """ - import scine_utilities as utils - + results = system.get_results() + if results.energy is None: + self.raise_named_exception(f"{system.name()} is missing energy result") + return # unreachable only for linter if not structure.has_property("electronic_energy"): self.store_energy(system, structure) - results = system.get_results() + if results.hessian is None: + self.raise_named_exception(f"{system.name()} is missing Hessian result") + return # unreachable only for linter # Get normal modes and frequencies atoms = structure.get_atoms() modes_container = utils.normal_modes.calculate(results.hessian, atoms.elements, atoms.positions) @@ -94,6 +102,13 @@ def store_hessian_data(self, system, structure): ) thermo_container = results.thermochemistry + if thermo_container is None: + thermo_calculator = utils.ThermochemistryCalculator(results.hessian, atoms, structure.get_multiplicity(), + results.energy) + thermo_calculator.set_temperature(float(model.temperature)) + thermo_calculator.set_pressure(float(model.pressure)) + thermo_container = thermo_calculator.calculate() + self.store_property( self._properties, "gibbs_free_energy", diff --git a/scine_puffin/jobs/templates/scine_job.py b/scine_puffin/jobs/templates/scine_job.py index efcf900..5e7a1d3 100644 --- a/scine_puffin/jobs/templates/scine_job.py +++ b/scine_puffin/jobs/templates/scine_job.py @@ -1,15 +1,19 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ -from typing import List, Tuple, Union +from typing import List, Tuple, Optional + +import scine_database as db +import scine_utilities as utils from .job import Job, job_configuration_wrapper from scine_puffin.config import Configuration from scine_puffin.utilities.scine_helper import SettingsManager, update_model from scine_puffin.utilities.program_helper import ProgramHelper +from scine_puffin.utilities.transfer_helper import TransferHelper class ScineJob(Job): @@ -22,14 +26,12 @@ class ScineJob(Job): def __init__(self): super().__init__() - self.name = "ScineJob" # to be overwritten by child class + self.name = self.__class__.__name__ self.own_expected_results = [] # to be overwritten by child class # to be added by child class: self.properties_to_transfer = [ "surface_atom_indices", "slab_dict", - "slab_formula", - "primitive_lattice", ] self._fallback_error = "Error: " + self.name + " failed with an unspecified error." @@ -60,7 +62,7 @@ def required_programs() -> List[str]: """See Job.required_programs()""" raise NotImplementedError - def create_helpers(self, structure) -> Tuple[SettingsManager, Union[ProgramHelper, None]]: + def create_helpers(self, structure: db.Structure) -> Tuple[SettingsManager, Optional[ProgramHelper]]: """ Creates a Scine SettingsManager and ProgramHelper based on the configured job and the given structure. The ProgramHelper is None if no ProgramHelper is specified for the specified program or no program was @@ -77,11 +79,11 @@ def create_helpers(self, structure) -> Tuple[SettingsManager, Union[ProgramHelpe Returns ------- - helper_tuple :: Tuple[SettingsManager, Union[ProgramHelper, None] + helper_tuple :: Tuple[SettingsManager, Optional[ProgramHelper] A tuple of the SettingsManager for Scine Calculators and ProgramHelper if available. """ model = self._calculation.get_model() - program = model.program if model.program != "any" else "" + program = model.program if model.program.lower() != "any" else "" settings_manager = SettingsManager(model.method_family, program) program_helper = ProgramHelper.get_correct_helper(program, self._manager, structure, self._calculation) return settings_manager, program_helper @@ -98,7 +100,7 @@ def throw_if_not_successful( success: bool, systems: dict, keys: List[str], - expected_results: Union[List[str], None] = None, + expected_results: Optional[List[str]] = None, sub_task_error_line: str = "", ) -> None: """ @@ -117,7 +119,7 @@ def throw_if_not_successful( The dictionary holding calculators. keys :: List[str] The list of keys of the systems dict to be checked. - expected_results :: Union[List[str], None] + expected_results :: Optional[List[str]] The results to be required to be present in systems to qualify as successful calculations. If None is given, this defaults to the expected results of the class, see expected_results(). sub_task_error_line :: str @@ -139,7 +141,7 @@ def calculation_postprocessing( success: bool, systems: dict, keys: List[str], - expected_results: Union[List[str], None] = None, + expected_results: Optional[List[str]] = None, ): """ Performs a verification protocol that a Scine Calculation was successful. If not throws an exception, @@ -159,7 +161,7 @@ def calculation_postprocessing( The dictionary holding calculators. keys :: List[str] The list of keys of the systems dict to be checked. - expected_results :: Union[List[str], None] + expected_results :: Optional[List[str]] The results to be required to be present in systems to qualify as successful calculations. If None is given, this defaults to the expected results of the class, see expected_results(). """ @@ -198,7 +200,7 @@ def expected_results_check( self, systems: dict, keys: List[str], - expected_results: Union[List[str], None] = None, + expected_results: Optional[List[str]] = None, ) -> Tuple[bool, str]: """ Checks the results of the given systems based on the expected results. If the expected results are not given, @@ -216,7 +218,7 @@ def expected_results_check( The dictionary holding calculators. keys :: List[str] The list of keys of the systems dict to be checked. - expected_results :: Union[List[str], None] + expected_results :: Optional[List[str]] The results to be required to be present in systems to qualify as successful calculations. If None is given, this defaults to the expected results of the class, see expected_results(). @@ -231,6 +233,8 @@ def expected_results_check( for key in keys: if key not in systems: return False, (key + " is missing in systems!") + if systems[key] is None: + return False, "" # check if desired results are present if not systems[key].has_results(): return False, ("System '" + key + "' is missing results!") @@ -240,7 +244,7 @@ def expected_results_check( return False, (expected + " is missing in results!") return True, "" - def store_energy(self, system, structure): + def store_energy(self, system: utils.core.Calculator, structure: db.Structure) -> None: """ Stores an 'electronic_energy' property for the given structure based on the energy in the results of the given system. Does not perform checks. @@ -266,31 +270,39 @@ def store_energy(self, system, structure): structure, ) - def transfer_properties(self, old_structure, new_structure): + def transfer_properties(self, old_structure: db.Structure, new_structure: db.Structure, + transfer_helper: Optional[TransferHelper] = None) -> None: """ Copies property IDs from one structure to another one based on the specified properties in the class member. + Notes + ----- + * Requires run configuration + Parameters ---------- old_structure :: db.Structure (Scine::Database::Structure) The structure holding the properties. If a specified property is not present for the structure, no error is given. new_structure :: db.Structure (Scine::Database::Structure) - The structure for which the property is to be added. + The structure for which the properties are to be added. + transfer_helper :: Optional[TransferHelper] + An optional helper for more difficult transfer task. Otherwise, the specified properties are just copied. """ properties_to_transfer = list(set(self.properties_to_transfer)) # make sure no duplicates - for prop in properties_to_transfer: - if old_structure.has_property(prop): - prop_id = old_structure.get_property(prop) - new_structure.set_property(prop, prop_id) + if transfer_helper is None: + for prop in properties_to_transfer: + TransferHelper.simple_transfer(old_structure, new_structure, prop) + else: + transfer_helper.transfer_properties(old_structure, new_structure, properties_to_transfer) def sp_postprocessing( self, success: bool, systems: dict, keys: List[str], - structure, - program_helper: Union[ProgramHelper, None], + structure: db.Structure, + program_helper: Optional[ProgramHelper], ): """ Performs a verification and results saving protocol for a Scine Single Point Calculation. @@ -361,3 +373,21 @@ def sp_postprocessing( if program_helper is not None: program_helper.calculation_postprocessing(self._calculation, structure) + + def get_calculation(self) -> db.Calculation: + """ + Getter for the current calculation. Throws if not configured. + + Notes + ----- + * Requires run configuration + * May throw Exception + + Returns + ------- + calculation :: db.Calculation (Scine::Database::Calculation) + The current calculation being carried out. + """ + if self._calculation is None: + self.raise_named_exception("Job is not configured and does not hold a calculation right now") + return self._calculation diff --git a/scine_puffin/jobs/templates/scine_observers.py b/scine_puffin/jobs/templates/scine_observers.py index 2929ef6..d258b6c 100644 --- a/scine_puffin/jobs/templates/scine_observers.py +++ b/scine_puffin/jobs/templates/scine_observers.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/templates/scine_optimization_job.py b/scine_puffin/jobs/templates/scine_optimization_job.py index ebd7152..9e32b6b 100644 --- a/scine_puffin/jobs/templates/scine_optimization_job.py +++ b/scine_puffin/jobs/templates/scine_optimization_job.py @@ -1,11 +1,15 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ from typing import List, Union +import scine_database as db +import scine_utilities as utils + + from .job import job_configuration_wrapper from .scine_job import ScineJob from scine_puffin.config import Configuration @@ -32,9 +36,9 @@ def run(self, manager, calculation, config: Configuration) -> bool: def required_programs(): return ["database", "readuct", "utils"] - def determine_new_label(self, structure, ignore_user_label: bool = False): + def determine_new_label(self, structure: db.Structure, graph: str, ignore_user_label: bool = False) -> db.Label: """ - Derive the label of the optimized structure based on the given structure. + Derive the label of the optimized structure based on the given structure and its Molassembler graph. Notes ----- @@ -45,6 +49,8 @@ def determine_new_label(self, structure, ignore_user_label: bool = False): ---------- structure :: db.Structure The structure to be optimized + graph :: str + The graph of the structure ignore_user_label :: bool Whether the user label of the given structure shall be ignored. If True, an input structure 'user_guess' will get an optimized structure with 'minimum_optimized' @@ -54,31 +60,51 @@ def determine_new_label(self, structure, ignore_user_label: bool = False): new_label :: db.Label The label of the optimized structure """ - import scine_database as db - label = structure.get_label() + graph_is_split = ";" in graph if label == db.Label.MINIMUM_GUESS or label == db.Label.MINIMUM_OPTIMIZED: - new_label = db.Label.MINIMUM_OPTIMIZED - elif label == db.Label.USER_GUESS or label == db.Label.USER_OPTIMIZED: - new_label = db.Label.USER_OPTIMIZED + if graph_is_split: + new_label = db.Label.COMPLEX_OPTIMIZED + else: + new_label = db.Label.MINIMUM_OPTIMIZED elif label == db.Label.SURFACE_GUESS or label == db.Label.SURFACE_OPTIMIZED: - new_label = db.Label.SURFACE_OPTIMIZED + if graph_is_split: + new_label = db.Label.SURFACE_COMPLEX_OPTIMIZED + else: + new_label = db.Label.SURFACE_OPTIMIZED elif label == db.Label.COMPLEX_GUESS or label == db.Label.COMPLEX_OPTIMIZED: - new_label = db.Label.COMPLEX_OPTIMIZED + if graph_is_split: + new_label = db.Label.COMPLEX_OPTIMIZED + else: + new_label = db.Label.MINIMUM_OPTIMIZED + elif label == db.Label.USER_OPTIMIZED: + if graph_is_split: + new_label = db.Label.USER_COMPLEX_OPTIMIZED + else: + new_label = db.Label.USER_OPTIMIZED + elif label == db.Label.USER_GUESS: + if graph_is_split: + if structure.has_property("surface_atom_indices"): + new_label = db.Label.USER_SURFACE_COMPLEX_OPTIMIZED + else: + new_label = db.Label.USER_COMPLEX_OPTIMIZED + else: + if structure.has_property("surface_atom_indices"): + new_label = db.Label.USER_SURFACE_OPTIMIZED + else: + new_label = db.Label.USER_OPTIMIZED else: - error = ( - "Unknown label '" - + str(label) - + "' of input structure: '" - + str(structure.id()) - + "'\n" - ) + error = f"Unknown label '{str(label)}' of input structure: '{str(structure.id())}'\n" self.raise_named_exception(error) + return # for type checking if ignore_user_label and new_label == db.Label.USER_OPTIMIZED: - new_label = db.Label.MINIMUM_OPTIMIZED + if graph_is_split: + new_label = db.Label.COMPLEX_OPTIMIZED + else: + new_label = db.Label.MINIMUM_OPTIMIZED return new_label - def create_new_structure(self, calculator, label): + def create_new_structure(self, calculator: utils.core.Calculator, label: db.Label) -> db.Structure: """ Add a new structure to the database based on the given calculator and label. @@ -93,16 +119,13 @@ def create_new_structure(self, calculator, label): label :: db.Label The label of the new structure """ - import scine_database as db - import scine_utilities as utils - # New structure new_structure = db.Structure() new_structure.link(self._structures) new_structure.create( calculator.structure, calculator.settings[utils.settings_names.molecular_charge], - calculator.settings[utils.settings_names.spin_multiplicity], + calculator.settings.get(utils.settings_names.spin_multiplicity, 0), self._calculation.get_model(), label, ) @@ -113,11 +136,11 @@ def optimization_postprocessing( success: bool, systems: dict, keys: List[str], - old_structure, - new_label, + old_structure: db.Structure, + new_label: db.Label, program_helper: Union[ProgramHelper, None], expected_results: Union[List[str], None] = None, - ): + ) -> db.Structure: """ Checks after an optimization whether everything went well and saves information to database. @@ -142,6 +165,8 @@ def optimization_postprocessing( The optional helper of the employed program for postprocessing program_helper :: Union[List[str], None] The expected results for the calculators, if not given, assumed from invoking Job class + expected_results :: Union[List[str], None] + The expected results for the calculators, if not given, assumed from invoking Job class """ # postprocessing of results with sanity checks diff --git a/scine_puffin/jobs/templates/scine_react_job.py b/scine_puffin/jobs/templates/scine_react_job.py index 0ebc39b..3acd3a5 100644 --- a/scine_puffin/jobs/templates/scine_react_job.py +++ b/scine_puffin/jobs/templates/scine_react_job.py @@ -1,16 +1,20 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ -from typing import Any, Dict, List, Tuple, Union, Optional +from math import ceil +from typing import Any, Dict, List, Tuple, Union, Optional, Iterator, Set import numpy as np import sys +import os from copy import deepcopy -import scine_molassembler as masm -from .job import job_configuration_wrapper +import scine_database as db +import scine_utilities as utils + +from .job import job_configuration_wrapper, breakable from .scine_connectivity_job import ConnectivityJob from .scine_hessian_job import HessianJob from .scine_optimization_job import OptimizationJob @@ -35,13 +39,19 @@ def __init__(self): self.job_key = "job" self.rc_opt_system_name = "rcopt" self.single_point_key = "sp" + self.no_irc_structure_matches_start = False # to be extended by child: self.settings: Dict[str, Dict[str, Any]] = { self.job_key: { "imaginary_wavenumber_threshold": 0.0, + "spin_propensity_check_for_unimolecular_reaction": True, + "spin_propensity_energy_range_to_save": 200.0, + "spin_propensity_optimize_all": True, + "spin_propensity_energy_range_to_optimize": 500.0, "spin_propensity_check": 2, "store_full_mep": False, "store_all_structures": False, + "n_surface_atom_threshold": 1, }, self.rc_key: { "minimal_spin_multiplicity": False, @@ -85,6 +95,8 @@ def __init__(self): self.complexation_criterion = -12.0 / 2625.5 # kj/mol self.check_charges = True self.systems = {} + self._component_maps: Dict[str, List[int]] = {} + self.products_component_map: Optional[List[int]] = None @job_configuration_wrapper def run(self, manager, calculation, config: Configuration) -> bool: @@ -95,9 +107,13 @@ def run(self, manager, calculation, config: Configuration) -> bool: def required_programs(): return ["database", "molassembler", "readuct", "utils"] - def observed_readuct_call(self, call_str: str, systems, input_names, **kwargs): - import scine_readuct as readuct + def clear(self) -> None: + self.systems = {} + super().clear() + def observed_readuct_call(self, call_str: str, systems: dict, input_names: List[str], **kwargs) \ + -> Tuple[dict, bool]: + import scine_readuct as readuct observers = [] observer_functions = [] model = self._calculation.get_model() @@ -113,9 +129,13 @@ def observed_readuct_call(self, call_str: str, systems, input_names, **kwargs): observer.finalize(self._manager, charge, multiplicity) return ret - def reactive_complex_preparations( - self, - ) -> Tuple[SettingsManager, Union[ProgramHelper, None]]: + def observed_readuct_call_with_throw(self, call_str: str, systems: dict, input_names: List[str], + expected_results: List[str], error_msg: str, **kwargs) -> dict: + systems, success = self.observed_readuct_call(call_str, systems, input_names, **kwargs) + self.throw_if_not_successful(success, systems, input_names, expected_results, error_msg) + return systems + + def reactive_complex_preparations(self) -> Tuple[SettingsManager, Union[ProgramHelper, None]]: """ Determine settings for this task based on settings of configured calculation, construct a reactive complex from the structures of the configured calculation, build a Scine Calculator for it and construct the @@ -131,9 +151,7 @@ def reactive_complex_preparations( settings_manager, program_helper :: Tuple[SettingsManager, Union[ProgramHelper, None]] A database property holding bond orders. """ - import scine_utilities as utils - import scine_database as db - + import scine_molassembler as masm # preprocessing of structure self.ref_structure = self.check_structures() settings_manager, program_helper = self.create_helpers(self.ref_structure) @@ -146,6 +164,8 @@ def reactive_complex_preparations( self.systems = dict() for i, structure_id in enumerate(self._calculation.get_structures()): structure = db.Structure(structure_id, self._structures) + if structure.get_label() == db.Label.SURFACE_ADSORPTION_GUESS: + continue name = "reactant_{:02d}".format(i) xyz_name = name + ".xyz" utils.io.write(xyz_name, structure.get_atoms()) @@ -184,17 +204,16 @@ def reactive_complex_preparations( self.systems[self.rc_key], self._calculation.get_settings()) # Calculate bond orders and graph of reactive complex and compare to database graph of start structures - reactive_complex_graph = self.make_graph_from_calc(self.systems, self.rc_key) + reactive_complex_graph, self.systems = self.make_graph_from_calc(self.systems, self.rc_key) if not masm.JsonSerialization.equal_molecules(reactive_complex_graph, self.start_graph): - self.raise_named_exception( - "Reactive complex graph differs from combined start structure graphs." - ) + print("Reactive complex graph differs from combined start structure graphs.") + self.start_graph = reactive_complex_graph return settings_manager, program_helper - def check_structures(self, start_structures: Union[List, None] = None): + def check_structures(self, start_structures: Union[List[db.ID], None] = None) -> db.Structure: """ Perform sanity check whether we only have 1 or 2 structures in the configured calculation. Return a possible - reference structure (largest one) for the construction of a ProgramHelper. + reference structure (the largest one) for the construction of a ProgramHelper. Notes ----- @@ -203,15 +222,14 @@ def check_structures(self, start_structures: Union[List, None] = None): Parameters ---------- - start_structures :: List[Scine::Database::Structure] - If given, this structure list is used instead of the list given in self._calculation.get_structures(). + start_structures :: List[db.ID] + If given, this structure id list is used instead of the list given in self._calculation.get_structures(). Returns ------- ref_structure :: db.Structure (Scine::Database::Structure) The largest structure of the calculation. """ - import scine_database as db if start_structures is None: start_structures = self._calculation.get_structures() if len(start_structures) == 0: @@ -223,13 +241,20 @@ def check_structures(self, start_structures: Union[List, None] = None): s2 = db.Structure(start_structures[1], self._structures) # choose larger structure as reference ref_id = start_structures[0] if len(s1.get_atoms()) >= len(s2.get_atoms()) else start_structures[1] + elif len(start_structures) == 3 and self._includes_label(start_structures, [db.Label.SURFACE_ADSORPTION_GUESS]): + # the given reaction has 3 structure, with one already representing the reactive complex + # the reactive complex is therefore a good ref_Id + for s in start_structures: + structure = db.Structure(s, self._structures) + if structure.get_label() == db.Label.SURFACE_ADSORPTION_GUESS: + ref_id = s else: self.raise_named_exception( "Reactive complexes built from more than 2 structures are not supported." ) return db.Structure(ref_id, self._structures) - def sort_settings(self, task_settings: dict): + def sort_settings(self, task_settings: dict) -> None: """ Take settings of configured calculation and save them in class member. Throw exception for unknown settings. @@ -258,7 +283,7 @@ def sort_settings(self, task_settings: dict): break # found right task, leave inner loop else: self.raise_named_exception( - "The key '{}' was not recognized.".format(key) + f"The key '{key}' was not recognized." ) if "ircopt" in self.settings.keys() and "output" in self.settings["ircopt"]: @@ -266,7 +291,8 @@ def sort_settings(self, task_settings: dict): "Cannot specify a separate output system for the optimization of the IRC end points" ) - def save_initial_graphs_and_charges(self, settings_manager: SettingsManager, structures: List): + def save_initial_graphs_and_charges(self, settings_manager: SettingsManager, structures: List[db.Structure]) \ + -> None: """ Save the graphs and charges of the reactants. @@ -282,8 +308,12 @@ def save_initial_graphs_and_charges(self, settings_manager: SettingsManager, str The reactant structures. """ graphs = [] - if len(structures) < 3: + if len(structures) < 3 or any(s.get_label() == db.Label.SURFACE_ADSORPTION_GUESS for s in structures): for i, s in enumerate(structures): + if s.get_label() == db.Label.SURFACE_ADSORPTION_GUESS: + # the given reaction has 3 structure, with one already representing the reactive complex + # the reactive complex should be skipped in this sanity check + continue decision_list = self._decision_list_from_structure(s) graph = self._cbor_graph_from_structure(s) if decision_list is None: @@ -316,14 +346,15 @@ def save_initial_graphs_and_charges(self, settings_manager: SettingsManager, str self.start_decision_lists))) ) self.start_graph = ";".join(graphs) - self.determine_pes_of_rc(settings_manager, *structures) + self.determine_pes_of_rc(settings_manager, *[s for s in structures + if s.get_label() != db.Label.SURFACE_ADSORPTION_GUESS]) else: # should not be reachable self.raise_named_exception( "Reactive complexes built from more than 2 structures are not supported." ) - def _cbor_graph_from_structure(self, structure) -> str: + def _cbor_graph_from_structure(self, structure: db.Structure) -> str: """ Retrieve masm_cbor_graph from a database structure and throws error if none present. @@ -339,8 +370,8 @@ def _cbor_graph_from_structure(self, structure) -> str: self.raise_named_exception(f"Missing graph in structure {str(structure.id())}.") return structure.get_graph("masm_cbor_graph") - @ staticmethod - def _decision_list_from_structure(structure) -> Optional[str]: + @staticmethod + def _decision_list_from_structure(structure: db.Structure) -> Optional[str]: """ Retrieve masm_decision_list from a database structure. Returns ``None`` if none present. @@ -357,7 +388,7 @@ def _decision_list_from_structure(structure) -> Optional[str]: return None return structure.get_graph("masm_decision_list") - def build_reactive_complex(self, settings_manager: SettingsManager): + def build_reactive_complex(self, settings_manager: SettingsManager) -> utils.AtomCollection: """ Aligns the structure(s) to form a reactive complex and returns the AtomCollection. In case of multiple structures, the active site settings are modified to reflect the correct index in the supermolecule. @@ -378,9 +409,6 @@ def build_reactive_complex(self, settings_manager: SettingsManager): reactive_complex :: utils.AtomCollection (Scine::Utilities::AtomCollection) The atoms of the reactive complex """ - import scine_database as db - import scine_utilities as utils - start_structure_ids = self._calculation.get_structures() start_structures = [db.Structure(sid, self._structures) for sid in start_structure_ids] self.save_initial_graphs_and_charges(settings_manager, start_structures) @@ -393,7 +421,7 @@ def build_reactive_complex(self, settings_manager: SettingsManager): return atoms if len(start_structures) == 2: - # Intermolecular reactions reactions require in situ generation of the reactive complex + # Intermolecular reactions require in situ generation of the reactive complex s0 = start_structures[0] s1 = start_structures[1] @@ -438,12 +466,19 @@ def build_reactive_complex(self, settings_manager: SettingsManager): self.random_displace_atoms(atoms, self.settings[self.rc_key]["displacement"]) # breaks symmetry return atoms + if len(start_structures) == 3: + # the given reaction has 3 structure, with one already representing the reactive complex + for s in start_structures: + if s.get_label() == db.Label.SURFACE_ADSORPTION_GUESS: + return s.get_atoms() + # should not be reachable self.raise_named_exception( "Reactive complexes built from more than 2 structures are not supported." ) - def determine_pes_of_rc(self, settings_manager: SettingsManager, s0, s1=None): + def determine_pes_of_rc(self, settings_manager: SettingsManager, s0: db.Structure, + s1: Optional[db.Structure] = None) -> None: """ Set charge and spin multiplicity within the settings_manager based on the reaction type (uni- vs. bimolecular) and the given settings for the reactive complex. @@ -525,7 +560,8 @@ def _orient_coordinates(self, coord1: np.ndarray, coord2: np.ndarray) -> np.ndar coord1 -= np.array([rc_settings["x_spread"], 0.0, 0.0]) return np.concatenate((coord1, coord2), axis=0) - def random_displace_atoms(self, atoms, displacement: float = 0.05): + @staticmethod + def random_displace_atoms(atoms: utils.AtomCollection, displacement: float = 0.05) -> None: """ Apply small seeded random displacement based on setting """ @@ -534,7 +570,7 @@ def random_displace_atoms(self, atoms, displacement: float = 0.05): coords += displacement * (np.random.rand(*coords.shape) - 0.5) * 2.0 / np.sqrt(3.0) atoms.positions = coords - def setup_automatic_mode_selection(self, name: str): + def setup_automatic_mode_selection(self, name: str) -> None: """ A settings sanity check, which adds the settings for automatic mode selection or doesn't based on the given user settings. @@ -545,7 +581,7 @@ def setup_automatic_mode_selection(self, name: str): The name of the subtask for which the automatic mode selection is added. """ if "automatic_mode_selection" not in self.settings[name] and all( - "_follow_mode" not in key for key in self.settings[name] + "_follow_mode" not in key for key in self.settings[name] ): if self.exploration_key + "_lhs_list" in self.settings[self.exploration_key]: self.settings[name]["automatic_mode_selection"] = ( @@ -568,15 +604,15 @@ def n_imag_frequencies(self, name: str) -> int: name :: str The name of the system which holds Hessian results. """ - import scine_utilities as utils - atoms = self.systems[name].structure modes_container = utils.normal_modes.calculate(self.systems[name].get_results().hessian, atoms) wavenumbers = modes_container.get_wave_numbers() return np.count_nonzero(np.array(wavenumbers) < self.settings[self.job_key]["imaginary_wavenumber_threshold"]) - def get_graph_charges_multiplicities(self, name: str, total_charge: int): + def get_graph_charges_multiplicities(self, name: str, total_charge: int, total_system_name: Optional[str] = None, + split_index: Optional[int] = None) \ + -> Tuple[List[utils.AtomCollection], str, List[int], List[int], List[str]]: """ Runs bond orders for the specified name in the dictionary of systems, constructs the resulting graphs and splits the system into @@ -596,6 +632,13 @@ def get_graph_charges_multiplicities(self, name: str, total_charge: int): Index into systems dictionary to calculate bond orders for total_charge :: str The charge of the system + total_system_name :: str + The name of the total system which can be specified in case this method is called for a partial system. + This can enable to assign the indices of the total system to the indices of the partial system. + split_index :: int + The index of the system in the total system which is split. This is used to assign the indices of the total + system to the indices of the partial system. Both total_system_name and split_index must be specified or + neither must be specified. Returns ------- @@ -606,26 +649,35 @@ def get_graph_charges_multiplicities(self, name: str, total_charge: int): charges :: List[int] Charges of the molecules. multiplicities :: List[int] - Minimal multiplicities of the molecules. + Multiplicities of the molecules, total multiplicity before split influences these returned values based + on a buff spread over all split structures, these values have to be checked with spin propensity checks decision_lists :: List[str] Molassembler decision lists for free dihedrals """ import scine_readuct as readuct - import scine_utilities as utils - - bond_orders = self.make_bond_orders_from_calc(self.systems, name) + from scine_puffin.utilities.reaction_transfer_helper import ReactionTransferHelper + + all_surface_indices = self.surface_indices_all_structures() + if total_system_name is None: + surface_indices: Union[Set[int], List[int]] = all_surface_indices + elif total_system_name not in self._component_maps: + self.raise_named_exception(f"Total system name '{total_system_name}' not found in component maps") + return utils.AtomCollection(), "", [], [], [] # For type checking + elif split_index is None: + self.raise_named_exception(f"Split index must be given, " + f"if total system name '{total_system_name}' is specified") + return utils.AtomCollection(), "", [], [], [] # For type checking + else: + split_surfaces_indices = \ + ReactionTransferHelper.map_total_indices_to_split_structure_indices( + all_surface_indices, self._component_maps[total_system_name]) + surface_indices = split_surfaces_indices[split_index] - pbc_string = self.systems[name].settings.get(utils.settings_names.periodic_boundaries, "") - masm_results = masm_helper.get_molecules_result( - self.systems[name].structure, - bond_orders, - self.connectivity_settings, - pbc_string, - ) + masm_results, self.systems = self.make_masm_result_from_calc(self.systems, name, surface_indices) split_structures = masm_results.component_map.apply(self.systems[name].structure) - decision_lists = [masm_helper.get_decision_list_from_molecule( - m, a) for m, a in zip(masm_results.molecules, split_structures)] + decision_lists = [masm_helper.get_decision_list_from_molecule(m, a) + for m, a in zip(masm_results.molecules, split_structures)] # Get cbor graphs graphs = [] @@ -633,7 +685,7 @@ def get_graph_charges_multiplicities(self, name: str, total_charge: int): graphs.append(masm_helper.get_cbor_graph_from_molecule(molecule)) # Determine partial charges, charges per molecules and number of electrons per molecule - bond_orders = self.make_bond_orders_from_calc(self.systems, name) + bond_orders, self.systems = self.make_bond_orders_from_calc(self.systems, name, surface_indices) partial_charges = self.systems[name].get_results().atomic_charges if partial_charges is None: self.systems, success = readuct.run_single_point_task( @@ -643,13 +695,42 @@ def get_graph_charges_multiplicities(self, name: str, total_charge: int): success, self.systems, [name], ["energy", "atomic_charges"] ) partial_charges = self.systems[name].get_results().atomic_charges + # TODO replace with propert setter if we have on in utils, this does not work self.systems[name].get_results().bond_orders = bond_orders charges, n_electrons, _ = self._integrate_charges(masm_results.component_map, partial_charges, split_structures, total_charge) - # This assumes minimal multiplicity, product multiplicities are again checked later around this multiplicity - multiplicities = [nel % 2 + 1 for nel in n_electrons] + # Assign multiplicities where we try to spread the buff + # (i.e. multiplicity difference before to singlet / duplet multiplicity) + # --> if before 3 -> give one structure (largest) triplet, before 5 --> give each a triplet + # this ensures that the spin propensity checks later can cover as much as possible + # this should work with any multiplicity and any number of split structures + multiplicity_before = self.systems[name].settings[utils.settings_names.spin_multiplicity] + total_electrons_were_even = multiplicity_before % 2 != 0 + min_multiplicity = 1 if total_electrons_were_even else 2 + buff = (multiplicity_before - min_multiplicity) / 2.0 + n_structures = len(split_structures) + if n_structures == 1: + multiplicities = [multiplicity_before] + elif not buff: + multiplicities = [nel % 2 + 1 for nel in n_electrons] + else: + buff_per_structure = int(ceil(buff / n_structures)) + sorted_structures = sorted(split_structures, reverse=True) # sort by size, the largest first + # sort electrons just like structures + sorted_n_electrons = [n for _, n in sorted(zip(sorted_structures, n_electrons), reverse=True)] + # determine real index of the sorted electrons + sorting_indices = [n for _, n in sorted(zip(sorted_structures, list(range(n_structures))), reverse=True)] + multiplicities_array = np.zeros(n_structures, dtype=int) + for index, nel in zip(sorting_indices, sorted_n_electrons): + is_even = nel % 2 == 0 + multiplicity = 1 if is_even else 2 + if buff: + multiplicity += 2 * buff_per_structure + buff -= 1 + multiplicities_array[index] = multiplicity + multiplicities = list(multiplicities_array) # Sort everything according to graphs and if these are equal according to charges and then multiplicities graphs, charges, multiplicities, decision_lists, structure_order = ( @@ -659,16 +740,18 @@ def get_graph_charges_multiplicities(self, name: str, total_charge: int): charges, multiplicities, decision_lists, - range(0, len(split_structures))))) + range(len(split_structures))))) ) graph_string = ";".join(graphs) ordered_structures = [split_structures[i] for i in structure_order] + new_component_map = [structure_order.index(i) for i in list(masm_results.component_map)] + self._component_maps[name] = new_component_map return ordered_structures, graph_string, charges, multiplicities, decision_lists @staticmethod - def _custom_round(number: float, threshold=0.5) -> float: + def _custom_round(number: float, threshold: float = 0.5) -> float: """ Rounding number up or down depending on the threshold. To round down, delta must be smaller than the threshold. @@ -743,7 +826,7 @@ def _distribute_charge( The updated list of guessed charges where the sum equals the total charge of the supersystem. """ residual = self._calculate_residual(summed_partial_charges, charge_guess) - while (sum(charge_guess) != total_charge): + while sum(charge_guess) != total_charge: charge_diff = sum(charge_guess) - total_charge # too many electrons, add a charge if charge_diff < 0.0: @@ -786,7 +869,6 @@ def _integrate_charges(self, component_map: List[int], partial_charges: List[flo the determined charges per non-bonded molecule in the supersystem. """ - import scine_utilities as utils charges = [] n_electrons = [] for i in range(len(split_structures)): @@ -815,7 +897,7 @@ def _integrate_charges(self, component_map: List[int], partial_charges: List[flo residual = self._calculate_residual(summed_partial_charges, updated_charges) return updated_charges, n_electrons, residual - def check_for_barrierless_reaction(self): + def check_for_barrierless_reaction(self) -> Union[Tuple[str, List[str]], Tuple[None, None]]: """ Optimizes the reactive complex, comparing the result to the start structures determining if a barrierless reaction occurred. @@ -829,6 +911,7 @@ def check_for_barrierless_reaction(self): Molassembler decision lists for free dihedrals of the reaction product if there was any. """ + import scine_molassembler as masm # Check for barrierless reaction leading to new graphs if self.rc_opt_system_name not in self.systems: # Skip if already done print("Running Reactive Complex Optimization") @@ -844,9 +927,12 @@ def check_for_barrierless_reaction(self): [], "Reactive complex optimization failed.\n", ) - _, rc_opt_graph, _, _, rc_opt_decision_lists = \ + _, rc_opt_graph, _, _, rc_opt_decision_lists = \ self.get_graph_charges_multiplicities(self.rc_opt_system_name, sum(self.start_charges)) + print("Optimized Reactive Complex Graph:") + print(rc_opt_graph) + if not masm.JsonSerialization.equal_molecules(self.start_graph, rc_opt_graph): return rc_opt_graph, rc_opt_decision_lists return None, None @@ -923,6 +1009,7 @@ def irc_sanity_checks_and_analyze_sides( start_names :: Optional[List[str]] A list of the access keys to the starting materials in the system map. """ + import scine_molassembler as masm if len(inputs) != 2: self.raise_named_exception( "Requires to pass 2 systems to the IRC sanity check" @@ -962,8 +1049,9 @@ def irc_sanity_checks_and_analyze_sides( # Analyze separated forward molecules forward_graphs = [] forward_decision_lists = [] - for name, charge in zip(forward_names, forward_charges): - s, g, _, _, d = self.get_graph_charges_multiplicities(name, charge) + for i, (name, charge) in enumerate(zip(forward_names, forward_charges)): + s, g, _, _, d = self.get_graph_charges_multiplicities(name, charge, + total_system_name=inputs[0], split_index=i) if len(s) > 1: self._calculation.set_comment(self.name + ": IRC results keep decomposing (more than once).") return None, None @@ -983,8 +1071,9 @@ def irc_sanity_checks_and_analyze_sides( # Analyze separated backward molecules backward_graphs = [] backward_decision_lists = [] - for name, charge in zip(backward_names, backward_charges): - s, g, _, _, d = self.get_graph_charges_multiplicities(name, charge) + for i, (name, charge) in enumerate(zip(backward_names, backward_charges)): + s, g, _, _, d = self.get_graph_charges_multiplicities(name, charge, + total_system_name=inputs[1], split_index=i) if len(s) > 1: self._calculation.set_comment(self.name + ": IRC results keep decomposing (more than once).") return None, None @@ -1018,75 +1107,109 @@ def irc_sanity_checks_and_analyze_sides( or forward_charges != backward_charges) if not found_new_structures: self._calculation.set_comment(self.name + ": IRC forward and backward have identical structures.") + self._save_ts_for_restart(db.Label.TS_OPTIMIZED) return None, None + compare_decision_lists = True # Do not expect matching charges if reactive complex charge differs from sum of start structure charges - if masm.JsonSerialization.equal_molecules(forward_graph, self.start_graph)\ + if masm.JsonSerialization.equal_molecules(forward_graph, self.start_graph) \ and (not check_charges or forward_charges == self.start_charges): product_names = backward_names self.step_direction = "backward" + self.products_component_map = self._component_maps[inputs[1]] + compare_decision_lists = False elif masm.JsonSerialization.equal_molecules(backward_graph, self.start_graph) and ( - not check_charges or backward_charges == self.start_charges + not check_charges or backward_charges == self.start_charges ): product_names = forward_names self.step_direction = "forward" + self.products_component_map = self._component_maps[inputs[0]] elif ';' in self.start_graph: rc_opt_graph, _ = self.check_for_barrierless_reaction() print("Barrierless Check Graph:") print(rc_opt_graph) if rc_opt_graph is None: - self._calculation.set_comment(self.name + ": No IRC structure matches starting structure.") - return None, None - if masm.JsonSerialization.equal_molecules(forward_graph, rc_opt_graph): + print(self.name + ": No IRC structure matches starting structure.") + product_names = forward_names + # Step direction must be forward to guarantee working logic downstream + self.step_direction = "forward" + self.products_component_map = self._component_maps[inputs[0]] + # Trigger to set 'start_names' as 'backward_names' + compare_decision_lists = False + self.no_irc_structure_matches_start = True + elif masm.JsonSerialization.equal_molecules(forward_graph, rc_opt_graph): self.step_direction = "backward" product_names = backward_names + self.products_component_map = self._component_maps[inputs[1]] self.lhs_barrierless_reaction = True elif masm.JsonSerialization.equal_molecules(backward_graph, rc_opt_graph): self.step_direction = "forward" product_names = forward_names + self.products_component_map = self._component_maps[inputs[0]] self.lhs_barrierless_reaction = True else: - self._calculation.set_comment(self.name + ": No IRC structure matches starting structure.") - return None, None + print(self.name + ": No IRC structure matches starting structure.") + product_names = forward_names + # Step direction must be forward to guarantee working logic downstream + self.step_direction = "forward" + self.products_component_map = self._component_maps[inputs[0]] + # Trigger to set 'start_names' as 'backward_names' + compare_decision_lists = False + self.no_irc_structure_matches_start = True else: - self._calculation.set_comment(self.name + ": No IRC structure matches starting structure.") - return None, None - - # Compare decision lists of start structures: - original_decision_lists = self.start_decision_lists - if self.step_direction == "backward": - new_decision_lists = forward_decision_lists + print(self.name + ": No IRC structure matches starting structure.") + product_names = forward_names + # Step direction must be forward to guarantee working logic downstream + self.step_direction = "forward" + self.products_component_map = self._component_maps[inputs[0]] + # Trigger to set 'start_names' as 'backward_names' + compare_decision_lists = False + self.no_irc_structure_matches_start = True + + if not compare_decision_lists: + # ensures that we save the start structures + decision_lists_match = False else: - new_decision_lists = backward_decision_lists - decision_lists_match: bool = True - for new, orig in zip(new_decision_lists, original_decision_lists): - if not masm.JsonSerialization.equal_decision_lists(new, orig): - decision_lists_match = False - break + # Compare decision lists of start structures: + original_decision_lists = self.start_decision_lists + if self.step_direction == "backward": + new_decision_lists = forward_decision_lists + else: + new_decision_lists = backward_decision_lists + decision_lists_match = True + for new, orig in zip(new_decision_lists, original_decision_lists): + if not masm.JsonSerialization.equal_decision_lists(new, orig): + decision_lists_match = False + break if not decision_lists_match: if self.step_direction == "backward": start_names = forward_names else: + # Important, if no_irc_structure_matches_start! start_names = backward_names else: start_names = None # additional check for double ended methods if self.end_graph: if ( - masm.JsonSerialization.equal_molecules(forward_graph, self.start_graph) - and masm.JsonSerialization.equal_molecules(backward_graph, self.end_graph) - and (not check_charges or forward_charges == self.start_charges) + masm.JsonSerialization.equal_molecules(forward_graph, self.start_graph) + and masm.JsonSerialization.equal_molecules(backward_graph, self.end_graph) + and (not check_charges or forward_charges == self.start_charges) ): product_names = backward_names elif ( - masm.JsonSerialization.equal_molecules(forward_graph, self.end_graph) - and masm.JsonSerialization.equal_molecules(backward_graph, self.start_graph) - and (not check_charges or backward_charges == self.start_charges) + masm.JsonSerialization.equal_molecules(forward_graph, self.end_graph) + and masm.JsonSerialization.equal_molecules(backward_graph, self.start_graph) + and (not check_charges or backward_charges == self.start_charges) ): product_names = forward_names else: - self._calculation.set_comment(self.name + ": IRC does not match double ended method") - return None, None + print(self.name + ": IRC does not match double-ended method") + # IRC points do not match end points of double ended method, + # hence the IRC points are forwarded for post-processing. + product_names = backward_names + start_names = forward_names + self.step_direction = "backward" # Check if complexations need to be tracked forward_complexation_energy = 0.0 for name in forward_names: @@ -1106,16 +1229,17 @@ def irc_sanity_checks_and_analyze_sides( self.rhs_complexation = True else: self.lhs_complexation = True + return product_names, start_names def optimize_structures( - self, - name_stub: str, - structures, - structure_charges: List[int], - structure_multiplicities: List[int], - calculator_settings: dict, - stop_on_error: Optional[bool] = True + self, + name_stub: str, + structures: List[utils.AtomCollection], + structure_charges: List[int], + structure_multiplicities: List[int], + calculator_settings: dict, + stop_on_error: bool = True ) -> List[str]: """ For each given product AtomCollection: @@ -1141,7 +1265,7 @@ def optimize_structures( The spin multiplicities of the structures. calculator_settings :: dict The general settings for the Scine calculator. Charge and spin multiplicity will be overwritten. - stop_on_error :: Optional[bool] + stop_on_error :: bool If set to False, skip unsuccessful calculations and replace calculator with None Returns @@ -1150,9 +1274,8 @@ def optimize_structures( A list of the access keys to the structures in the system map. """ import scine_readuct as readuct - import scine_utilities as utils - structure_names = [] + method_family = self._calculation.get_model().method_family # Generate structure systems for i, structure in enumerate(structures): name = f"{name_stub}_{i:02d}" @@ -1166,10 +1289,11 @@ def optimize_structures( # generate calculator new = utils.core.load_system_into_calculator( name + ".xyz", - self._calculation.get_model().method_family, + method_family, **structure_calculator_settings, ) self.systems[name] = new + self._add_propensity_systems(name) except RuntimeError as e: if stop_on_error: raise e @@ -1178,42 +1302,54 @@ def optimize_structures( print("Product Opt Settings:") print(self.settings["opt"], "\n") + required_properties = ["energy"] + if not self.connectivity_settings['only_distance_connectivity']: + required_properties.append("bond_orders") # Optimize structures, if they have more than one atom; otherwise just run a single point calculation for structure in structure_names: if self.systems[structure] is None: continue try: - self.systems, success = readuct.run_single_point_task( - self.systems, - [structure], - spin_propensity_check=self.settings[self.job_key]["spin_propensity_check"], - require_bond_orders=True, - ) - if len(self.systems[structure].structure) > 1: - print("Optimizing " + structure + ":\n") - self.systems, success = self.observed_readuct_call( - 'run_opt_task', self.systems, [structure], **self.settings["opt"] - ) - self.throw_if_not_successful( - success, - self.systems, - [structure], - ["energy"], - f"{name_stub.capitalize()} optimization failed:\n", - ) + if not self.settings[self.job_key]["spin_propensity_check"]: self.systems, success = readuct.run_single_point_task( self.systems, [structure], - spin_propensity_check=self.settings[self.job_key]["spin_propensity_check"], - require_bond_orders=True, + require_bond_orders=not self.connectivity_settings['only_distance_connectivity'], ) - self.throw_if_not_successful( - success, - self.systems, - [structure], - ["energy", "bond_orders"], - f"{name_stub.capitalize()} optimization failed:\n", - ) + self.throw_if_not_successful(success, self.systems, [structure], required_properties, + f"{name_stub.capitalize()} single point failed:\n") + else: + self._spin_propensity_single_points(structure, f"{name_stub.capitalize()} single point failed:\n") + if len(self.systems[structure].structure) > 1: + if len(structure_names) == 1 and len(self._calculation.get_structures()) == 1 and \ + not self.settings[self.job_key]["spin_propensity_check_for_unimolecular_reaction"]: + # optimize only base multiplicity + self.systems = self.observed_readuct_call_with_throw( + 'run_opt_task', self.systems, [structure], required_properties, + f"{name_stub.capitalize()} optimization failed:\n", **self.settings["opt"] + ) + # still do propensity SP to store close energy multiplicities in DB + self._spin_propensity_single_points(structure, + f"{name_stub.capitalize()} optimization failed:\n") + elif not self.settings[self.job_key]["spin_propensity_optimize_all"]: + prev_lowest = None + lowest_name, _ = self._get_propensity_names_within_range( + structure, self.settings[self.job_key]["spin_propensity_energy_range_to_optimize"] + ) + while lowest_name != prev_lowest: + print("Optimizing " + lowest_name + ":\n") + self.systems = self.observed_readuct_call_with_throw( + 'run_opt_task', self.systems, [lowest_name], required_properties, + f"{name_stub.capitalize()} optimization failed:\n", **self.settings["opt"] + ) + self._spin_propensity_single_points(structure, + f"{name_stub.capitalize()} optimization failed:\n") + lowest_name, _ = self._get_propensity_names_within_range( + structure, self.settings[self.job_key]["spin_propensity_energy_range_to_optimize"] + ) + else: + self._spin_propensity_optimizations(structure, + f"{name_stub.capitalize()} optimization failed:\n") except RuntimeError as e: if stop_on_error: raise e @@ -1221,8 +1357,103 @@ def optimize_structures( self.systems[structure] = None return structure_names + def _add_propensity_systems(self, name: str) -> None: + for shift_name, multiplicity in self._propensity_iterator(name): + if shift_name == name: + continue + self.systems[shift_name] = self.systems[name].clone() + self.systems[shift_name].delete_results() # make sure results of clone are empty + if utils.settings_names.spin_mode in self.systems[shift_name].settings: + dc = self.systems[shift_name].settings.descriptor_collection + if isinstance(dc[utils.settings_names.spin_mode], + utils.OptionListDescriptor): + for suitable in ["unrestricted", "restricted_open_shell", "any"]: + if suitable in dc[utils.settings_names.spin_mode].options: + self.systems[shift_name].settings[utils.settings_names.spin_mode] = suitable + break + else: + self.systems[shift_name].settings[utils.settings_names.spin_mode] = "any" + self.systems[shift_name].settings[utils.settings_names.spin_multiplicity] = multiplicity + + def _propensity_iterator(self, name: str) -> Iterator[Tuple[str, int]]: + from scine_utilities import settings_names + + propensity_limit = self.settings[self.job_key]["spin_propensity_check"] + for shift in range(-propensity_limit, propensity_limit + 1): + multiplicity = self.systems[name].settings[settings_names.spin_multiplicity] + shift * 2 + if multiplicity > 0: + shift_name = f"{name}_multiplicity_shift_{shift}" if shift else name + yield shift_name, multiplicity + + def _spin_propensity_single_points(self, name: str, error_msg: str) -> None: + import scine_readuct as readuct + info = f"Single point calculations of {name}" + if self.settings[self.job_key]["spin_propensity_check"]: + info += " with potential spin propensities" + info += ":\n" + print(info) + total_success = 0 + for shift_name, _ in self._propensity_iterator(name): + if self.systems.get(shift_name) is None: + continue + if self.systems[shift_name].get_results().energy is not None: + # we already have an energy for this system + total_success += 1 + continue + self.systems, success = readuct.run_single_point_task( + self.systems, + [shift_name], + require_bond_orders=not self.connectivity_settings['only_distance_connectivity'], + stop_on_error=False + ) + if success: + total_success += 1 + else: + self.systems[shift_name] = None + if not total_success: + self.throw_if_not_successful(False, self.systems, [name], ["energy"], error_msg) + + def _spin_propensity_optimizations(self, name: str, error_msg: str) -> None: + info = f"Optimizing {name}" + if self.settings[self.job_key]["spin_propensity_check"]: + info += " with potential spin propensities" + info += ":\n" + print(info) + total_success = 0 + lowest_name, allowed_names = self._get_propensity_names_within_range( + name, + self.settings[self.job_key]["spin_propensity_energy_range_to_optimize"] + ) + all_names = [lowest_name] + allowed_names + for shift_name, _ in self._propensity_iterator(name): + if self.systems.get(shift_name) is None or shift_name not in all_names: + continue + self.systems, success = self.observed_readuct_call( + 'run_opt_task', self.systems, [shift_name], stop_on_error=False, **self.settings["opt"] + ) + if success: + total_success += 1 + else: + self.systems[shift_name] = None + if not total_success: + self.throw_if_not_successful(False, self.systems, [name], ["energy"], error_msg) + + def _save_ts_for_restart(self, ts_label: db.Label) -> None: + """ + Saves the output system of 'tsopt' (hence must already be finished) + as a restart information after some additional single points. + + Notes + ----- + * Requires run configuration + """ + ts_name = self.output("tsopt")[0] + # do propensity single_points for TS and save data + _, ts = self._store_ts_with_propensity_info(ts_name, None, ts_label) + self._calculation.set_restart_information("TS", ts.id()) + def generate_spline( - self, tsopt_task_name: str, n_fit_points: int = 23, degree: int = 3 + self, tsopt_task_name: str, n_fit_points: int = 23, degree: int = 3 ): """ Using the transition state, IRC and IRC optimization outputs generates @@ -1247,9 +1478,6 @@ def generate_spline( spline :: utils.bsplines.TrajectorySpline The fitted spline of the elementary step trajectory. """ - import scine_utilities as utils - import os - rpi = utils.bsplines.ReactionProfileInterpolation() def read_trj(fname): @@ -1273,12 +1501,17 @@ def read_trj(fname): else: self.raise_named_exception("Could not determine elementary step direction.") + ts_calc = self.systems[self.output(tsopt_task_name)[0]] + ts_energy = ts_calc.get_results().energy + fpath = os.path.join( self.work_dir, f"irc_{rev_dir}", f"irc_{rev_dir}.opt.trj.xyz" ) if os.path.isfile(fpath): trj, energies = read_trj(fpath) for pos, e in zip(reversed(trj), reversed(energies)): + if e > ts_energy: + continue rpi.append_structure(utils.AtomCollection(trj.elements, pos), e) fpath = os.path.join( @@ -1287,6 +1520,8 @@ def read_trj(fname): if os.path.isfile(fpath): trj, energies = read_trj(fpath) for pos, e in zip(reversed(trj), reversed(energies)): + if e > ts_energy: + continue rpi.append_structure(utils.AtomCollection(trj.elements, pos), e) else: raise RuntimeError( @@ -1308,6 +1543,8 @@ def read_trj(fname): if os.path.isfile(fpath): trj, energies = read_trj(fpath) for pos, e in zip(trj, energies): + if e > ts_energy: + continue rpi.append_structure(utils.AtomCollection(trj.elements, pos), e) else: raise RuntimeError( @@ -1318,6 +1555,8 @@ def read_trj(fname): if os.path.isfile(fpath): trj, energies = read_trj(fpath) for pos, e in zip(trj, energies): + if e > ts_energy: + continue rpi.append_structure(utils.AtomCollection(trj.elements, pos), e) # Get spline @@ -1325,14 +1564,14 @@ def read_trj(fname): return spline def store_start_structures( - self, - start_structure_names: List[str], - program_helper: Union[ProgramHelper, None], - tsopt_task_name: str, - start_structures: Optional[List[Any]] = None + self, + start_structure_names: List[str], + program_helper: Union[ProgramHelper, None], + tsopt_task_name: str, + start_structures: Optional[List[db.ID]] = None ): """ - Store the new start systems system in the database. + Store the new start systems in the database. Notes ----- @@ -1355,10 +1594,33 @@ def store_start_structures( start_structure_ids :: List[scine_database.ID] A list of the database IDs of the start structures. """ - import scine_database as db + import scine_molassembler as masm + from scine_puffin.utilities.reaction_transfer_helper import ReactionTransferHelper if start_structures is None: start_structures = self._calculation.get_structures() + # get start name + if self.step_direction == "forward": + start_name = self.output("irc")[1] + elif self.step_direction == "backward": + start_name = self.output("irc")[0] + else: + self.raise_named_exception("Could not determine elementary step direction.") + return # unreachable, just for linter + if start_name not in self._component_maps: + self.raise_named_exception("Could not find component map for start structures.") + return # unreachable, just for linter + + # check for surface indices + all_indices = self.surface_indices_all_structures(start_structures) + split_surfaces_indices = \ + ReactionTransferHelper.map_total_indices_to_split_structure_indices( + all_indices, self._component_maps[start_name]) + models = [db.Structure(sid, self._structures).get_model() + for sid in start_structures] + start_model = models[0] + if not all(model == start_model for model in models): + self.raise_named_exception("React job with mixed model input structures") # Update model to make sure there are no 'any' values left update_model( @@ -1368,14 +1630,16 @@ def store_start_structures( ) start_structure_ids = [] - for name in start_structure_names: + for i, name in enumerate(start_structure_names): + surface_indices = split_surfaces_indices[i] if split_surfaces_indices is not None else None # Check if the new structures are actually duplicates duplicate: Optional[db.ID] = None - dl = ';'.join(self.make_decision_lists_from_calc(self.systems, name)) - graph = self.make_graph_from_calc(self.systems, name) + dl = ';'.join(self.make_decision_lists_from_calc(self.systems, name, surface_indices)[0]) + graph, self.systems = self.make_graph_from_calc(self.systems, name, surface_indices) for initial_id in start_structures: - initial_structure = db.Structure(initial_id) - initial_structure.link(self._structures) + initial_structure = db.Structure(initial_id, self._structures) + if not initial_structure.has_graph('masm_cbor_graph'): + continue initial_graph = initial_structure.get_graph("masm_cbor_graph") if not masm.JsonSerialization.equal_molecules(initial_graph, graph): continue @@ -1388,10 +1652,12 @@ def store_start_structures( aggregate.link(self._compounds) existing_structures = aggregate.get_structures() for existing_structure_id in existing_structures: - existing_structure = db.Structure(existing_structure_id) - existing_structure.link(self._structures) + existing_structure = db.Structure(existing_structure_id, self._structures) if existing_structure.get_label() in \ - [db.Label.DUPLICATE, db.Label.MINIMUM_GUESS, db.Label.USER_GUESS]: + [db.Label.DUPLICATE, db.Label.MINIMUM_GUESS, db.Label.USER_GUESS, + db.Label.SURFACE_GUESS, db.Label.SURFACE_ADSORPTION_GUESS]: + continue + if existing_structure.get_model() != start_model: continue existing_structure_dl = existing_structure.get_graph("masm_decision_list") if masm.JsonSerialization.equal_decision_lists(dl, existing_structure_dl): @@ -1403,28 +1669,27 @@ def store_start_structures( start_structure_ids.append(duplicate) continue - new_structure = self.create_new_structure(self.systems[name], db.Label.MINIMUM_OPTIMIZED) - self.transfer_properties(self.ref_structure, new_structure) + label = self._determine_new_label_based_on_graph_and_surface_indices(graph, surface_indices) + new_structure = self.create_new_structure(self.systems[name], label) + for initial_id in start_structures: + initial_structure = db.Structure(initial_id, self._structures) + if not initial_structure.has_graph('masm_cbor_graph'): + continue + if initial_structure.get_model() != new_structure.get_model(): + continue + initial_graph = initial_structure.get_graph("masm_cbor_graph") + if masm.JsonSerialization.equal_molecules(initial_graph, graph): + self.transfer_properties(initial_structure, new_structure) + if program_helper is not None: + program_helper.calculation_postprocessing(self._calculation, initial_structure, new_structure) + bond_orders, self.systems = self.make_bond_orders_from_calc(self.systems, name, surface_indices) self.store_energy(self.systems[name], new_structure) - self.store_property( - self._properties, - "bond_orders", - "SparseMatrixProperty", - self.systems[name].get_results().bond_orders.matrix, - self._calculation.get_model(), - self._calculation, - new_structure, - ) - self.add_graph(new_structure, self.systems[name].get_results().bond_orders) - if ";" in graph: - new_structure.set_label(db.Label.COMPLEX_OPTIMIZED) - if program_helper is not None: - program_helper.calculation_postprocessing(self._calculation, self.ref_structure, new_structure) + self.store_bond_orders(bond_orders, new_structure) + self.add_graph(new_structure, bond_orders, surface_indices) start_structure_ids.append(new_structure.id()) return start_structure_ids - def save_barrierless_reaction(self, product_graph: str, program_helper: Optional[ProgramHelper]): - import scine_database as db + def save_barrierless_reaction_from_rcopt(self, product_graph: str, program_helper: Optional[ProgramHelper]) -> None: self.lhs_barrierless_reaction = True print("Barrierless product Graph:") print(product_graph) @@ -1434,44 +1699,60 @@ def save_barrierless_reaction(self, product_graph: str, program_helper: Optional db_results = self._calculation.get_results() db_results.clear() # Save RHS of barrierless step - rhs_complex_label = self.rc_opt_system_name - rhs_complex_system = self.systems[rhs_complex_label] - if ";" in product_graph: - rhs_complex = self.create_new_structure(rhs_complex_system, db.Label.COMPLEX_OPTIMIZED) - else: - rhs_complex = self.create_new_structure(rhs_complex_system, db.Label.MINIMUM_OPTIMIZED) - db_results.add_structure(rhs_complex.id()) - self.transfer_properties(self.ref_structure, rhs_complex) - self.store_energy(self.systems[rhs_complex_label], rhs_complex) - bond_orders = self.make_bond_orders_from_calc(self.systems, rhs_complex_label) - self.store_property( - self._properties, - "bond_orders", - "SparseMatrixProperty", - bond_orders.matrix, - self._calculation.get_model(), - self._calculation, - rhs_complex, - ) - self.add_graph(rhs_complex, bond_orders) - if program_helper is not None: - program_helper.calculation_postprocessing(self._calculation, self.ref_structure, rhs_complex) + rhs_complex_id = self._save_complex_to_db(self.rc_opt_system_name, program_helper) + db_results.add_structure(rhs_complex_id) # Save step - new_step = db.ElementaryStep() - new_step.link(self._elementary_steps) - new_step.create(self._calculation.get_structures(), [rhs_complex.id()]) + new_step = db.ElementaryStep(db.ID(), self._elementary_steps) + new_step.create(self._calculation.get_structures(), [rhs_complex_id]) new_step.set_type(db.ElementaryStepType.BARRIERLESS) db_results.add_elementary_step(new_step.id()) self._calculation.set_comment(self.name + ": Barrierless reaction found.") self._calculation.set_results(self._calculation.get_results() + db_results) + def _save_complex_to_db(self, complex_name: str, program_helper: Optional[ProgramHelper]) -> db.ID: + """ + Saves structure with given name in systems map as a new structure in the database together with + energy, bond orders, and graph. + The label is determined based on the generated graph. Both of which rely on the fact that the given complex + is the supersystem of all start structures. + See `_determine_new_label_based_on_graph` for more details. + + Notes + ----- + * Requires run configuration + * May throw exception + + Parameters + ---------- + complex_name :: str + The name of the complex system in the systems map + program_helper :: Union[ProgramHelper, None] + The ProgramHelper which might also want to do postprocessing + Returns + ------- + complex_structure_id :: db.ID + The id of the added structure + """ + complex_system = self.systems[complex_name] + complex_graph, self.systems = self.make_graph_from_calc(self.systems, complex_name) + structure_label = self._determine_new_label_based_on_graph(complex_system, complex_graph) + complex_structure = self.create_new_structure(complex_system, structure_label) + bond_orders, self.systems = self.make_bond_orders_from_calc(self.systems, complex_name) + self.transfer_properties(self.ref_structure, complex_structure) + self.store_energy(self.systems[complex_name], complex_structure) + self.store_bond_orders(bond_orders, complex_structure) + self.add_graph(complex_structure, bond_orders) + if program_helper is not None: + program_helper.calculation_postprocessing(self._calculation, self.ref_structure, complex_structure) + return complex_structure.id() + def react_postprocessing( self, product_names: List[str], program_helper: Union[ProgramHelper, None], tsopt_task_name: str, - reactant_structur_ids: List - ): + reactant_structure_ids: List[db.ID] + ) -> Tuple[List[db.ID], List[db.ID], db.ElementaryStep]: """ Carries out a verification protocol after the calculation context has been closed, clears database result and then fills it with the found products, TS, and elementary step and all properties that can be associated @@ -1490,11 +1771,10 @@ def react_postprocessing( The ProgramHelper which might also want to do postprocessing tsopt_task_name :: str The name of the task where the TS was output - reactant_structur_ids :: List[scine_database.ID] + reactant_structure_ids :: List[scine_database.ID] A list of all structure IDs for the reactants. """ - import scine_database as db - import scine_utilities as utils + from scine_puffin.utilities.reaction_transfer_helper import ReactionTransferHelper if not product_names: # should not be reachable @@ -1515,111 +1795,77 @@ def react_postprocessing( ) """ Save products """ - new_label = db.Label.MINIMUM_OPTIMIZED + new_labels = self._determine_product_labels_of_single_compounds(product_names) + # check for surface indices + assert self.products_component_map is not None + all_indices = self.surface_indices_all_structures(self._calculation.get_structures()) + split_surfaces_indices = \ + ReactionTransferHelper.map_total_indices_to_split_structure_indices( + all_indices, self.products_component_map) + end_structures = [] - for product in product_names: - new_structure = self.create_new_structure(self.systems[product], new_label) - self.transfer_properties(self.ref_structure, new_structure) - self.store_energy(self.systems[product], new_structure) - self.store_property( - self._properties, - "bond_orders", - "SparseMatrixProperty", - self.systems[product].get_results().bond_orders.matrix, - self._calculation.get_model(), - self._calculation, - new_structure, - ) - self.add_graph(new_structure, self.systems[product].get_results().bond_orders) + single_molecule_mode: bool = len(product_names) == 1 and len(self._calculation.get_structures()) == 1 and \ + not self.settings[self.job_key]["spin_propensity_check_for_unimolecular_reaction"] + for i, (label, product) in enumerate(zip(new_labels, product_names)): + surface_indices = split_surfaces_indices[i] + new_structure = self._store_structure_with_propensity_check(product, label, + enforce_to_save_base_name=single_molecule_mode, + surface_indices=surface_indices) if program_helper is not None: program_helper.calculation_postprocessing(self._calculation, self.ref_structure, new_structure) - db_results.add_structure(new_structure.id()) end_structures.append(new_structure.id()) + """ transfer properties to products which requires to pass all structures""" + transfer_helper = ReactionTransferHelper(self, self._properties) + start_structures = [db.Structure(sid, self._structures) for sid in self._calculation.get_structures() + if db.Structure(sid, self._structures).get_label() != db.Label.SURFACE_ADSORPTION_GUESS] + product_structures = [db.Structure(sid, self._structures) for sid in end_structures] + transfer_helper.transfer_properties_between_multiple(start_structures, product_structures, + self.properties_to_transfer) """ Save TS """ - ts_calc = self.systems[self.output(tsopt_task_name)[0]] - new_ts = self.create_new_structure(ts_calc, db.Label.TS_OPTIMIZED) - self.transfer_properties(self.ref_structure, new_ts) - self.store_hessian_data(ts_calc, new_ts) - if program_helper is not None: - program_helper.calculation_postprocessing(self._calculation, self.ref_structure, new_ts) - db_results.add_structure(new_ts.id()) + ts_name = self.output(tsopt_task_name)[0] + # do propensity single_points for TS and save data + ts_calc, new_ts = self._store_ts_with_propensity_info(ts_name, program_helper, db.Label.TS_OPTIMIZED) """ Save Complexes """ if self.lhs_barrierless_reaction or self.lhs_complexation: if self.lhs_barrierless_reaction: lhs_complex_label = self.rc_opt_system_name - lhs_complex_graph, _ = self.check_for_barrierless_reaction() - lhs_complex_system = self.systems[lhs_complex_label] - if ';' in lhs_complex_graph: - lhs_complex = self.create_new_structure(lhs_complex_system, db.Label.COMPLEX_OPTIMIZED) - else: - lhs_complex = self.create_new_structure(lhs_complex_system, db.Label.MINIMUM_OPTIMIZED) - db_results.add_structure(lhs_complex.id()) + elif self.step_direction == "forward": + lhs_complex_label = "irc_backward" else: - lhs_complex_label = "irc_backward" if self.step_direction == "forward" else "irc_forward" - lhs_complex_system = self.systems[lhs_complex_label] - lhs_complex = self.create_new_structure(lhs_complex_system, db.Label.COMPLEX_OPTIMIZED) - bond_orders = self.make_bond_orders_from_calc(self.systems, lhs_complex_label) - self.transfer_properties(self.ref_structure, lhs_complex) - self.store_energy(self.systems[lhs_complex_label], lhs_complex) - self.store_property( - self._properties, - "bond_orders", - "SparseMatrixProperty", - bond_orders.matrix, - self._calculation.get_model(), - self._calculation, - lhs_complex, - ) - self.add_graph(lhs_complex, bond_orders) - # Keep track of the lhs structure calculation. - if program_helper is not None: - program_helper.calculation_postprocessing(self._calculation, self.ref_structure, lhs_complex) - db_results.add_structure(lhs_complex.id()) + lhs_complex_label = "irc_forward" + lhs_complex_id = self._save_complex_to_db(lhs_complex_label, program_helper) + db_results.add_structure(lhs_complex_id) if self.rhs_complexation: rhs_complex_label = "irc_forward" if self.step_direction == "forward" else "irc_backward" - bond_orders = self.make_bond_orders_from_calc(self.systems, rhs_complex_label) - rhs_complex_system = self.systems[rhs_complex_label] - rhs_complex = self.create_new_structure(rhs_complex_system, db.Label.COMPLEX_OPTIMIZED) - self.transfer_properties(self.ref_structure, rhs_complex) - self.store_energy(self.systems[rhs_complex_label], rhs_complex) - self.store_property( - self._properties, - "bond_orders", - "SparseMatrixProperty", - bond_orders.matrix, - self._calculation.get_model(), - self._calculation, - rhs_complex, - ) - self.add_graph(rhs_complex, bond_orders) - if program_helper is not None: - program_helper.calculation_postprocessing(self._calculation, self.ref_structure, rhs_complex) - db_results.add_structure(rhs_complex.id()) + rhs_complex_id = self._save_complex_to_db(rhs_complex_label, program_helper) + db_results.add_structure(rhs_complex_id) """ Save Steps """ - main_step_lhs = reactant_structur_ids + main_step_lhs = [rsid for rsid in reactant_structure_ids + if db.Structure(rsid, self._structures).get_label() != db.Label.SURFACE_ADSORPTION_GUESS] main_step_rhs = end_structures if self.lhs_barrierless_reaction or self.lhs_complexation: new_step = db.ElementaryStep() new_step.link(self._elementary_steps) - new_step.create(reactant_structur_ids, [lhs_complex.id()]) + new_step.create(reactant_structure_ids, [lhs_complex_id]) new_step.set_type(db.ElementaryStepType.BARRIERLESS) db_results.add_elementary_step(new_step.id()) - main_step_lhs = [lhs_complex.id()] + main_step_lhs = [lhs_complex_id] if self.rhs_complexation: new_step = db.ElementaryStep() new_step.link(self._elementary_steps) - new_step.create([rhs_complex.id()], end_structures) + new_step.create([rhs_complex_id], end_structures) new_step.set_type(db.ElementaryStepType.BARRIERLESS) db_results.add_elementary_step(new_step.id()) - main_step_rhs = [rhs_complex.id()] + main_step_rhs = [rhs_complex_id] new_step = db.ElementaryStep() new_step.link(self._elementary_steps) new_step.create(main_step_lhs, main_step_rhs) new_step.set_type(db.ElementaryStepType.REGULAR) new_step.set_transition_state(new_ts.id()) db_results.add_elementary_step(new_step.id()) + """ Save Reaction Path as a Spline""" spline = self.generate_spline(tsopt_task_name) new_step.set_spline(spline) @@ -1631,16 +1877,130 @@ def react_postprocessing( _ = self.save_mep_in_db(new_step, charge, multiplicity, model) """ Save new starting materials if there are any""" original_start_structures = self._calculation.get_structures() - for rid in reactant_structur_ids: + for rid in reactant_structure_ids: if rid not in original_start_structures: # TODO should duplicates be removed here? db_results.add_structure(rid) # intermediate function may have written directly to calculation - # results, therefore add to already existing + # results, therefore add to already existing self._calculation.set_results(self._calculation.get_results() + db_results) - return main_step_lhs, main_step_rhs + return main_step_lhs, main_step_rhs, new_step + + def _store_ts_with_propensity_info(self, ts_name: str, program_helper: Optional[ProgramHelper], + ts_label: db.Label) -> Tuple[utils.core.Calculator, db.Structure]: + # do propensity single_points for TS + self._add_propensity_systems(ts_name) + self._spin_propensity_single_points(ts_name, "Failed all spin propensity single points for TS, " + "which means we could not recalculate the TS system. " + "This points to a SCINE calculator error.") + new_ts = self._store_structure_with_propensity_check(ts_name, ts_label, + enforce_to_save_base_name=True) + self.transfer_properties(self.ref_structure, new_ts) + ts_calc = self.systems[ts_name] + self.store_hessian_data(ts_calc, new_ts) + if program_helper is not None: + program_helper.calculation_postprocessing(self._calculation, self.ref_structure, new_ts) + return ts_calc, new_ts + + def _store_structure_with_propensity_check(self, name: str, label: db.Label, enforce_to_save_base_name: bool, + surface_indices: Optional[Union[List[int], Set[int]]] = None) \ + -> db.Structure: + from scine_utilities import settings_names as sn + from scine_utilities import KJPERMOL_PER_HARTREE + + def create_impl(structure_name: str) -> db.Structure: + bond_orders, self.systems = self.make_bond_orders_from_calc(self.systems, structure_name, surface_indices) + new_structure = self.create_new_structure(self.systems[structure_name], label) + self.store_energy(self.systems[structure_name], new_structure) + self.store_bond_orders(bond_orders, new_structure) + self.add_graph(new_structure, bond_orders, surface_indices) + # Label can change based on graph after optimization + if label not in [db.Label.TS_OPTIMIZED, db.Label.TS_GUESS]: + new_graph = self._cbor_graph_from_structure(new_structure) + new_label = self._determine_new_label_based_on_graph_and_surface_indices(new_graph, surface_indices) + if label != new_label: + print("Propensity check led to new label of " + structure_name + ". Relabeling it.") + new_structure.set_label(new_label) + results = self._calculation.get_results() + results.add_structure(new_structure.id()) + self._calculation.set_results(results) + return new_structure + + lowest_name, names_to_save = self._get_propensity_names_within_range( + name, self.settings[self.job_key]["spin_propensity_energy_range_to_save"] + ) + spin_propensity_hit = lowest_name != name + # Printing information + if spin_propensity_hit: + print(f"Noticed spin propensity. Lowest energy spin multiplicity of {name} is " + f"{self.systems[lowest_name].settings[sn.spin_multiplicity]}") + if names_to_save: + print("Spin states with rel. energies to lowest state in kJ/mol which are also saved to the database:") + print("name | multiplicity | rel. energy") + base_energy = self.systems[lowest_name].get_results().energy + for n in names_to_save: + multiplicity = self.systems[n].settings[sn.spin_multiplicity] + energy = self.systems[n].get_results().energy + rel_energy = (energy - base_energy) * KJPERMOL_PER_HARTREE + print(f" {n} | {multiplicity} | {rel_energy}") + if enforce_to_save_base_name: + print(f"Still saving the base multiplicity of {self.systems[name].settings[sn.spin_multiplicity]} " + f"in the elementary step") + # overwrite names to simply safe and write as product of elementary step + names_to_save += [lowest_name] + if name in names_to_save: + names_to_save.remove(name) + lowest_name = name + + # Saving information + name_to_structure_and_label_map = {} + for n in names_to_save: + # Store as Tuple[db.Sturcture, db.Label] + name_to_structure_and_label_map[n] = [create_impl(n)] + name_to_structure_and_label_map[n] += [name_to_structure_and_label_map[n][0].get_label()] + + name_to_structure_and_label_map[lowest_name] = [create_impl(lowest_name)] + name_to_structure_and_label_map[lowest_name] += [name_to_structure_and_label_map[lowest_name][0].get_label()] + + # Decide which structure to return + # Lowest name if no better spin state was found or if the lower spin state still has the same label as name + if not spin_propensity_hit or \ + name_to_structure_and_label_map[lowest_name][1] == label or \ + enforce_to_save_base_name: + return name_to_structure_and_label_map[lowest_name][0] + else: + return name_to_structure_and_label_map[name][0] - def save_mep_in_db(self, elementary_step, charge, multiplicity, model): + def store_bond_orders(self, bond_orders: utils.BondOrderCollection, structure: db.Structure) -> None: + self.store_property( + self._properties, + "bond_orders", + "SparseMatrixProperty", + bond_orders.matrix, + self._calculation.get_model(), + self._calculation, + structure, + ) + + def _get_propensity_names_within_range(self, name: str, allowed_energy_range: float) -> Tuple[str, List[str]]: + energies: Dict[str, Optional[float]] = {} + for shift_name, _ in self._propensity_iterator(name): + calc = self.systems[shift_name] + energy = calc.get_results().energy if calc is not None else None + energies[shift_name] = energy + # get name with the lowest energy to save as product + lowest_name = min({k: v for k, v in energies.items() if v is not None}, key=energies.get) # type: ignore + lowest_energy = energies[lowest_name] + assert lowest_energy is not None + names_within_range: List[str] = [] + for k, v in energies.items(): + if v is not None and k != lowest_name and \ + abs(v - lowest_energy) * utils.KJPERMOL_PER_HARTREE < allowed_energy_range: + names_within_range.append(k) + return lowest_name, names_within_range + + def save_mep_in_db(self, elementary_step: db.ElementaryStep, charge: int, multiplicity: int, model: db.Model) \ + -> List[db.ID]: """ Store each point on the MEP as a structure in the database. Attaches `electronic_energy` properties for each point. @@ -1661,10 +2021,6 @@ def save_mep_in_db(self, elementary_step, charge, multiplicity, model): The model with which all energies in the elementary Step were calculated. """ - import scine_utilities as utils - import scine_database as db - import os - def read_trj(fname): trj = utils.io.read_trajectory(utils.io.TrajectoryFormat.Xyz, fname) energies = [] @@ -1746,3 +2102,278 @@ def generate_structure(atoms, charge, multiplicity, model): elementary_step.set_path(structure_ids) return structure_ids + + def _includes_label(self, structure_id_list: List[db.ID], labels: List[db.Label]) -> bool: + """ + Returns if any structure in the list has any of the given labels. + + Notes + ----- + * Requires run configuration + * May throw exception + + Parameters + ---------- + structure_id_list :: List[db.ID] + A list structure ids + labels :: List[db.Label] + The required labels + """ + return self._label_locations(structure_id_list, labels)[0] is not None + + def _label_locations(self, structure_id_list: List[db.ID], labels: List[db.Label]) \ + -> Union[Tuple[int, int], Tuple[None, None]]: + """ + Returns the first index of the structure in the list that holds any of the given labels + and the index of the label. + Returns None if no given structure has none of the given labels. + + Notes + ----- + * Requires run configuration + * May throw exception + + Parameters + ---------- + structure_id_list :: List[db.ID] + A list structure ids + labels :: List[db.Label] + The required labels + """ + for i, sid in enumerate(structure_id_list): + structure = db.Structure(sid, self._structures) + for j, label in enumerate(labels): + if structure.get_label() == label: + return i, j + return None, None + + def _determine_new_label_based_on_graph_and_surface_indices(self, graph_str: str, + surface_indices: Union[List[int], Set[int], None]) \ + -> db.Label: + graph_is_split = ";" in graph_str + no_surf_split_decision_label = db.Label.COMPLEX_OPTIMIZED if graph_is_split else db.Label.MINIMUM_OPTIMIZED + surf_split_decision_label = db.Label.SURFACE_COMPLEX_OPTIMIZED if graph_is_split else db.Label.SURFACE_OPTIMIZED + thresh = self.settings[self.job_key]["n_surface_atom_threshold"] + if surface_indices is not None and len(surface_indices) > thresh: + return surf_split_decision_label + return no_surf_split_decision_label + + def _determine_new_label_based_on_graph(self, calculator: utils.core.Calculator, graph_str: str) -> db.Label: + """ + Determines label for a product structure of the given react job based on the given graph and the labels + of the starting structures. + Crucially, this method only works if + - the given structure is a superstructure of all start structures + For multiple split structures we require a mapping information on the atom level between the start structures + and the individual products. If this is the case, the labels must be assigned to all products at once. + See `_determine_product_labels_of_single_compounds` for that, which will however not work on complexes. + + Notes + ----- + * Requires run configuration + * May throw exception + + Parameters + ---------- + calculator :: Core::Calculator + The calculator holding the structure + graph_str :: str + The cbor graph of one or more molecules (separated by ';') + Returns + ------- + label :: db.Label + The correct label for the new structure corresponding to the given graph + """ + graph_is_split = ";" in graph_str + no_surf_split_decision_label = db.Label.COMPLEX_OPTIMIZED if graph_is_split else db.Label.MINIMUM_OPTIMIZED + surf_split_decision_label = db.Label.SURFACE_COMPLEX_OPTIMIZED if graph_is_split else db.Label.SURFACE_OPTIMIZED + start_structure_ids = self._calculation.get_structures() + if not self._includes_label(start_structure_ids, [db.Label.SURFACE_OPTIMIZED, + db.Label.USER_SURFACE_OPTIMIZED, + db.Label.SURFACE_COMPLEX_OPTIMIZED, + db.Label.USER_SURFACE_COMPLEX_OPTIMIZED]): + # no surface present in inputs + return no_surf_split_decision_label + # we had a surface in the inputs + start_structures = [db.Structure(s, self._structures) for s in start_structure_ids] + adsorb_guess_index, _ = self._label_locations(start_structure_ids, [db.Label.SURFACE_ADSORPTION_GUESS]) + if adsorb_guess_index is not None: + # eliminate adsorb guess from start structure considerations + start_structures = [s for i, s in enumerate(start_structures) if i != adsorb_guess_index] + n_start_atoms = sum(len(s.get_atoms()) for s in start_structures) + if len(calculator.structure) == n_start_atoms: + # we got no split in react job, structure must still be a surface + return surf_split_decision_label + raise RuntimeError(f"Could not deduced the label for the new structure {graph_str} " + f"based on start structures {[str(s) for s in start_structure_ids]}") + + def _determine_product_labels_of_single_compounds(self, names: List[str], + component_map: Optional[List[int]] = None) -> List[db.Label]: + """ + Determines labels of all individual product structures of the given react job based on the labels of the + starting structures. + Crucially, this method only works if + - each specified system in the `names` holds only a compound + - the `products_component_map` has been evaluated in the IRC check + For complex structures this method does not work, because we require the graph for that, which requires + the knowledge about individual surface atoms. + See `_determine_new_label_based_on_graph` for that, which will however not work on only a partial structure + of the initial start structure combination for surfaces. + + Notes + ----- + * Requires run configuration + * May throw exception + + Parameters + ---------- + names :: List[str] + The list of system names of the products in the systems map + component_map :: Optional[List[int]] + The component map of the given systems, take product_component_map if None + + Returns + ------- + labels :: List[db.Label] + The correct labels for the new structures + """ + if self.products_component_map is None and component_map is None: + self.raise_named_exception(f"Could not deduce the labels for the new structures {names}") + if component_map is None: + component_map = self.products_component_map + assert component_map is not None # for type check + surface_indices = self.surface_indices_all_structures() + if not surface_indices: + # we don't have a surface --> all compounds and no user input because products + return [db.Label.MINIMUM_OPTIMIZED] * len(names) + # sanity checks + n_product_atoms = sum(len(self.systems[name].structure) for name in names) + if any(index >= n_product_atoms for index in surface_indices): + self.raise_named_exception("Surface indices include invalid numbers for the given products") + if len(component_map) != n_product_atoms: + self.raise_named_exception("Invalid product component map for the given products") + product_surface_atoms = [0] * len(names) + for index in surface_indices: + product_surface_atoms[component_map[index]] += 1 + # do not categorize if only single surface atom, but assume this is a transfer from the surface to the product + thresh = self.settings[self.job_key]["n_surface_atom_threshold"] + return [db.Label.SURFACE_OPTIMIZED if n > thresh else db.Label.MINIMUM_OPTIMIZED for n in product_surface_atoms] + + def _tsopt_hess_irc_ircopt(self, tsguess_system_name: str, settings_manager: SettingsManager) \ + -> Tuple[List[str], Optional[List[str]]]: + """ + Takes a TS guess and carries out: + * TS optimization + * Hessian calculation and check for valid TS + * IRC calculation + * random displacement of IRC points + * Optimization with faster converging optimizer than Steepest Descent to arrive at true minima + + Parameters + ---------- + tsguess_system_name : str + The name of the system holding the TS guess + settings_manager : SettingsManager + The settings manager + """ + import scine_readuct as readuct + inputs = [tsguess_system_name] + """ TSOPT JOB """ + self.setup_automatic_mode_selection("tsopt") + print("TSOpt Settings:") + print(self.settings["tsopt"], "\n") + self.systems, success = self.observed_readuct_call( + 'run_tsopt_task', self.systems, inputs, **self.settings["tsopt"]) + self.throw_if_not_successful( + success, + self.systems, + self.output("tsopt"), + ["energy"], + "TS optimization failed:\n", + ) + + """ TS HESSIAN """ + inputs = self.output("tsopt") + self.systems, success = readuct.run_hessian_task(self.systems, inputs) + self.throw_if_not_successful( + success, + self.systems, + inputs, + ["energy", "hessian", "thermochemistry"], + "TS Hessian calculation failed.\n", + ) + + if self.n_imag_frequencies(inputs[0]) != 1: + self._save_ts_for_restart(db.Label.TS_GUESS) + self.raise_named_exception(f"Error: {self.name} failed with message: " + f"TS has incorrect number of imaginary frequencies.") + + """ IRC JOB """ + # IRC (only a few steps to allow decent graph extraction) + print("IRC Settings:") + print(self.settings["irc"], "\n") + self.systems, success = self.observed_readuct_call( + 'run_irc_task', self.systems, inputs, **self.settings["irc"]) + + """ IRC OPT JOB """ + # Run a small energy minimization after initial IRC + inputs = self.output("irc") + print("IRC Optimization Settings:") + print(self.settings["ircopt"], "\n") + for i in inputs: + atoms = self.systems[i].structure + self.random_displace_atoms(atoms) + self.systems[i].positions = atoms.positions + self.systems, success = self.observed_readuct_call( + 'run_opt_task', self.systems, [inputs[0]], **self.settings["ircopt"]) + self.systems, success = self.observed_readuct_call( + 'run_opt_task', self.systems, [inputs[1]], **self.settings["ircopt"]) + + """ Check whether we have a valid IRC """ + initial_charge = settings_manager.calculator_settings[utils.settings_names.molecular_charge] + product_names, start_names = self.irc_sanity_checks_and_analyze_sides( + initial_charge, self.check_charges, inputs, settings_manager.calculator_settings) + if product_names is None: # IRC did not pass checks, reason has been set as comment, complete job + self.verify_connection() + self.capture_raw_output() + update_model( + self.systems[self.output("tsopt")[0]], + self._calculation, + self.config, + ) + raise breakable.Break + return product_names, start_names + + def _tsopt_hess_irc_ircopt_postprocessing(self, tsguess_system_name: str, settings_manager: SettingsManager, + program_helper: Optional[ProgramHelper]) -> None: + """ + Takes a TS guess and carries out: + * TS optimization + * Hessian calculation and check for valid TS + * IRC calculation + * random displacement of IRC points + * Faster optimization to arrive at true minima + * Checks for the validity of the IRC and saving the results + + Notes + ----- + All but last step are done in `_tsopt_hess_irc_ircopt` + + Parameters + ---------- + tsguess_system_name : str + The name of the system holding the TS guess + settings_manager : SettingsManager + The settings manager + program_helper : Optional[ProgramHelper] + The program helper + """ + product_names, start_names = self._tsopt_hess_irc_ircopt(tsguess_system_name, settings_manager) + """ Store new starting material conformer(s) """ + if start_names is not None: + start_structures = self.store_start_structures( + start_names, program_helper, "tsopt") + else: + start_structures = self._calculation.get_structures() + + self.react_postprocessing(product_names, program_helper, "tsopt", start_structures) diff --git a/scine_puffin/jobs/turbomole_bond_orders.py b/scine_puffin/jobs/turbomole_bond_orders.py index 5eaa522..9b4b6c9 100644 --- a/scine_puffin/jobs/turbomole_bond_orders.py +++ b/scine_puffin/jobs/turbomole_bond_orders.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/turbomole_geometry_optimization.py b/scine_puffin/jobs/turbomole_geometry_optimization.py index 7049e9b..7509fd0 100644 --- a/scine_puffin/jobs/turbomole_geometry_optimization.py +++ b/scine_puffin/jobs/turbomole_geometry_optimization.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -161,6 +161,8 @@ def run(self, manager, calculation, config: Configuration) -> bool: job = calculation.get_job() # New label + # TODO: These labels are not necessarily correct; during the optimization, a + # complex coul be created label = structure.get_label() if label == db.Label.MINIMUM_GUESS or label == db.Label.MINIMUM_OPTIMIZED: new_label = db.Label.MINIMUM_OPTIMIZED diff --git a/scine_puffin/jobs/turbomole_hessian.py b/scine_puffin/jobs/turbomole_hessian.py index fec0799..703eec6 100644 --- a/scine_puffin/jobs/turbomole_hessian.py +++ b/scine_puffin/jobs/turbomole_hessian.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/jobs/turbomole_single_point.py b/scine_puffin/jobs/turbomole_single_point.py index 2558148..4aa9ca2 100644 --- a/scine_puffin/jobs/turbomole_single_point.py +++ b/scine_puffin/jobs/turbomole_single_point.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/__init__.py b/scine_puffin/programs/__init__.py index 4d46073..d85c6e5 100644 --- a/scine_puffin/programs/__init__.py +++ b/scine_puffin/programs/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/ams.py b/scine_puffin/programs/ams.py new file mode 100644 index 0000000..100ddb8 --- /dev/null +++ b/scine_puffin/programs/ams.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +import os +from typing import List + +from .program import Program +from scine_puffin.config import Configuration + + +class Ams(Program): + """ + Setup of the AMS program via the Scine AMS_wrapper + """ + + def install(self, repo_dir: str, install_dir: str, ncores: int): + if self.root: + pass + if self.source: + self.scine_module_install(repo_dir, install_dir, ncores) + else: + raise RuntimeError + + def check_install(self): + raise NotImplementedError + + def setup_environment(self, config: Configuration, env_paths: dict, env_vars: dict): + if self.root: + env_vars["SCMLICENSE"] = os.getenv("SCMLICENSE") + if all(os.getenv(var) is None for var in ["AMSHOME", "AMSBIN", "AMS_BINARY_PATH"]): + if os.path.exists(os.path.join(self.root, "bin", "ams")): + env_vars["AMSBIN"] = os.path.join(self.root, "bin") + env_vars["AMSHOME"] = os.path.join(self.root) + env_vars["AMS_BINARY_PATH"] = os.path.join(self.root, "bin") + elif self.source: + pass + else: + raise RuntimeError + + def available_models(self) -> List[str]: + return ["DFT", "DFTB3", "DFTB2", "DFTB0", "GFN1", "GFN0", "REAXFF", "MLPOTENTIAL"] diff --git a/scine_puffin/programs/core.py b/scine_puffin/programs/core.py new file mode 100644 index 0000000..72b3207 --- /dev/null +++ b/scine_puffin/programs/core.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from typing import List + +from .program import Program +from scine_puffin.config import Configuration + + +class Core(Program): + """ + Scine: Core -- installation and verification class + """ + + def install(self, repo_dir: str, install_dir: str, ncores: int): + if self.root: + raise NotImplementedError + elif self.source: + self.scine_module_install(repo_dir, install_dir, ncores) + else: + raise RuntimeError + + def check_install(self): + raise NotImplementedError + + def setup_environment(self, config: Configuration, env_paths: dict, env_vars: dict): + if self.root: + raise NotImplementedError + elif self.source: + pass + else: + raise RuntimeError + + def available_models(self) -> List[str]: + return [] diff --git a/scine_puffin/programs/cp2k.py b/scine_puffin/programs/cp2k.py index 56786e9..8075af4 100644 --- a/scine_puffin/programs/cp2k.py +++ b/scine_puffin/programs/cp2k.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/database.py b/scine_puffin/programs/database.py index d3555fc..0a4912f 100644 --- a/scine_puffin/programs/database.py +++ b/scine_puffin/programs/database.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/deprecated/rdkit.py.depr b/scine_puffin/programs/deprecated/rdkit.py.depr index eeaa816..ab9ae93 100644 --- a/scine_puffin/programs/deprecated/rdkit.py.depr +++ b/scine_puffin/programs/deprecated/rdkit.py.depr @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/gaussian.py b/scine_puffin/programs/gaussian.py index 2c7bf0f..3abf466 100644 --- a/scine_puffin/programs/gaussian.py +++ b/scine_puffin/programs/gaussian.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/kinetx.py b/scine_puffin/programs/kinetx.py index 376bbf8..fab58ab 100644 --- a/scine_puffin/programs/kinetx.py +++ b/scine_puffin/programs/kinetx.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/molassembler.py b/scine_puffin/programs/molassembler.py index cc1ee2f..11235f9 100644 --- a/scine_puffin/programs/molassembler.py +++ b/scine_puffin/programs/molassembler.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/mrcc.py b/scine_puffin/programs/mrcc.py new file mode 100644 index 0000000..4e71cde --- /dev/null +++ b/scine_puffin/programs/mrcc.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from typing import List + +from .program import Program +from scine_puffin.config import Configuration + + +class Mrcc(Program): + """ + Setup of the MRCC program + """ + + def install(self, repo_dir: str, install_dir: str, ncores: int): + if self.root: + pass + elif self.source: + raise NotImplementedError + else: + raise RuntimeError + + def check_install(self): + raise NotImplementedError + + def setup_environment(self, config: Configuration, env_paths: dict, env_vars: dict): + if self.root: + # MRCC_BINARY_PATH needs to be set in order for MRCC to execute. + env_vars["MRCC_BINARY_PATH"] = self.root + elif self.source: + pass + else: + raise RuntimeError + + def available_models(self) -> List[str]: + return ["DFT", "HF", "CC", "MP2"] diff --git a/scine_puffin/programs/orca.py b/scine_puffin/programs/orca.py index b73b8e4..8f0f00d 100644 --- a/scine_puffin/programs/orca.py +++ b/scine_puffin/programs/orca.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/parrot.py b/scine_puffin/programs/parrot.py new file mode 100644 index 0000000..a31c84c --- /dev/null +++ b/scine_puffin/programs/parrot.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from typing import List + +from .program import Program +from scine_puffin.config import Configuration + + +class Parrot(Program): + """ + Parrot -- Machine Learning Potentials for SCINE + """ + + def install(self, repo_dir: str, install_dir: str, _: int): + if self.root: + raise RuntimeError + elif self.source: + self.pip_module_source_install(repo_dir, install_dir) + else: + self.pip_package_install('scine_parrot', install_dir) + + def check_install(self): + raise NotImplementedError + + def setup_environment(self, _: Configuration, __: dict, ___: dict): + pass + + def available_models(self) -> List[str]: + return ['lmlp', 'ani', 'm3gnet'] + + @staticmethod + def initialize(): + import scine_parrot # noqa: F401 , pylint: disable=unused-import diff --git a/scine_puffin/programs/program.py b/scine_puffin/programs/program.py index 40e26be..3ec9fff 100644 --- a/scine_puffin/programs/program.py +++ b/scine_puffin/programs/program.py @@ -1,10 +1,12 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ +from pkgutil import iter_modules from typing import List +from warnings import warn import git import os import subprocess @@ -51,6 +53,12 @@ def install(self, repo_dir: str, install_dir: str, ncores: int): """ raise NotImplementedError + @staticmethod + def initialize(): + """ + Executed at Puffin start, run once for each available program + """ + def check_install(self): """ A small function checking if the program was installed/located correctly @@ -146,7 +154,71 @@ def scine_module_install(self, repo_dir: str, install_dir: str, ncores: int, args.append("-DPYTHON_EXECUTABLE=" + sys.executable) if self.settings["cmake_flags"]: args += self.settings["cmake_flags"].split(" ") + if "sphinx" not in (name for loader, name, ispkg in iter_modules()): + warn("Sphinx is not installed, skipping Scine documentation build") + args.append("-DSCINE_BUILD_DOCS=OFF") args.append("..") subprocess.run(args, env=env, check=True) subprocess.run(["make", "-j" + str(ncores), "install"], env=env, check=True) os.chdir(initial_dir) + + def pip_module_source_install(self, repo_dir: str, install_dir: str): + initial_dir = os.getcwd() + + # Handle repository + if os.path.exists(repo_dir): + repository = git.Repo(repo_dir) + try: + repository.remotes.origin.pull() + repository.git.submodule("update", "--init") + except BaseException: + try: + repository.git.checkout("master") + except git.exc.GitCommandError: # type: ignore[misc] + repository.git.checkout("main") + repository.git.submodule("update", "--init") + repository.remotes.origin.pull() + repository.git.submodule("update", "--init") + finally: + repository.git.checkout(self.version) + repository.remotes.origin.pull() + repository.git.submodule("update", "--init") + else: + repository = git.Repo.clone_from(self.source, repo_dir) + repository.git.checkout(self.version) + repository.git.submodule("update", "--init") + + build_dir = os.path.join(repo_dir, "build") + if build_dir and not os.path.exists(build_dir): + os.makedirs(build_dir) + os.chdir(build_dir) + self.pip_package_install('../.', install_dir) + os.chdir(initial_dir) + + @staticmethod + def pip_package_install(package: str, install_dir: str): + env = os.environ.copy() + suffix = ( + 'python' + str(sys.version_info.major) + '.' + + str(sys.version_info.minor) + '/site-packages' + ) + if "PYTHONPATH" in env.keys(): + env["PYTHONPATH"] = ( + os.path.join(install_dir, 'lib', suffix) + + ":" + os.path.join(install_dir, 'lib64', suffix) + + ":" + env["PYTHONPATH"] + ) + else: + env["PYTHONPATH"] = ( + os.path.join(install_dir, 'lib', suffix) + + ":" + os.path.join(install_dir, 'lib64', suffix) + ) + subprocess.run( + [ + sys.executable, '-m' + 'pip', 'install', package, + '--no-cache', + '--prefix', install_dir + ], + env=env, + check=True + ) diff --git a/scine_puffin/programs/readuct.py b/scine_puffin/programs/readuct.py index 420fbbd..cd2efcd 100644 --- a/scine_puffin/programs/readuct.py +++ b/scine_puffin/programs/readuct.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/rms.py b/scine_puffin/programs/rms.py new file mode 100644 index 0000000..9e3aec0 --- /dev/null +++ b/scine_puffin/programs/rms.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from typing import List, Optional + +from .program import Program +from scine_puffin.config import Configuration + + +class Rms(Program): + """ + The reaction mechanism simulator. See https://github.com/ReactionMechanismGenerator/ReactionMechanismSimulator.jl + + See the install scripts in the "scripts" directory for details. Note that the installation requires Conda. + """ + + def install(self, repo_dir: str, install_dir: str, ncores: int): + if self.root: + pass + elif self.source: + raise NotImplementedError("RMS must be installed manually. See rms.py for details.") + else: + raise NotImplementedError("RMS must be installed manually. See rms.py for details.") + + def check_install(self): + self.assert_install() + + @staticmethod + def assert_install(): + if not Rms.is_installed(): + raise ModuleNotFoundError('RMS was not installed correctly. It must be preinstalled in a conda' + ' environment. An installation script is provided in scripts/rms/build_rms.sh.' + ' More information on the RMS installation process is provided on' + ' http://reactionmechanismgenerator.github.io/RMG-Py/users/rmg/installation/' + 'anacondaDeveloper.html') + + @staticmethod + def is_installed(): + try: + # pylint: disable=unused-import + import julia # noqa: F401 + from julia import ReactionMechanismSimulator # noqa: F401 + import diffeqpy # noqa: F401 + # pylint: enable=unused-import + except ImportError as e: + print("Julia, pyrms or diffeqpy could not be imported. The error message was:\n" + str(e)) + return False + return True + + def setup_environment(self, config: Configuration, env_paths: dict, env_vars: dict): + if self.root: + raise NotImplementedError + elif self.source: + raise NotImplementedError + else: + raise RuntimeError + + def available_models(self) -> List[str]: + return [] + + +class JuliaPrecompiler(object): + def __new__(cls): + if not hasattr(cls, 'instance'): + cls.instance = super(JuliaPrecompiler, cls).__new__(cls) + cls.instance.julia_is_precompiled = False + cls.instance.root: Optional[str] = None + return cls.instance + + def set_root(self, root: str): + # pylint: disable=attribute-defined-outside-init + self.root = root + # pylint: enable=attribute-defined-outside-init + + def compile_julia(self): + import os + # Try to load the system image if the file already exists. + if self.root: + if ".so" not in self.root or not os.path.exists(self.root): + raise RuntimeError("The shared library file for RMS was not found. Install RMS through the installation" + "scripts in the scripts directory and activate the conda environment after" + " installation.") + # pylint: disable=import-error + from julia import Julia # noqa: F401 + _ = Julia(sysimage=self.root) + # pylint: enable=import-error + else: + print("Compiling Julia on the fly. This may take a while!") + # If the system image is not available we resort to compiling it on the fly. This is potentially very slow. + # pylint: disable=import-error + from julia.api import Julia + _ = Julia(compiled_modules=False) + # pylint: enable=import-error + + # pylint: disable=attribute-defined-outside-init + self.julia_is_precompiled = True + # pylint: enable=attribute-defined-outside-init + + def ensure_is_compiled(self): + if not self.julia_is_precompiled: + self.compile_julia() diff --git a/scine_puffin/programs/serenity.py b/scine_puffin/programs/serenity.py index 9f1099d..a742fea 100644 --- a/scine_puffin/programs/serenity.py +++ b/scine_puffin/programs/serenity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/sparrow.py b/scine_puffin/programs/sparrow.py index 098120a..7a7b40c 100644 --- a/scine_puffin/programs/sparrow.py +++ b/scine_puffin/programs/sparrow.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/swoose.py b/scine_puffin/programs/swoose.py index 80ce11b..630889b 100644 --- a/scine_puffin/programs/swoose.py +++ b/scine_puffin/programs/swoose.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/turbomole.py b/scine_puffin/programs/turbomole.py index f695f7d..765d158 100644 --- a/scine_puffin/programs/turbomole.py +++ b/scine_puffin/programs/turbomole.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/utils.py b/scine_puffin/programs/utils.py index 94a8206..1c15794 100644 --- a/scine_puffin/programs/utils.py +++ b/scine_puffin/programs/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/programs/xtb.py b/scine_puffin/programs/xtb.py index db078bf..a94cf99 100644 --- a/scine_puffin/programs/xtb.py +++ b/scine_puffin/programs/xtb.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/__init__.py b/scine_puffin/tests/__init__.py index 347efce..cc9acd3 100644 --- a/scine_puffin/tests/__init__.py +++ b/scine_puffin/tests/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/conftest.py b/scine_puffin/tests/conftest.py new file mode 100644 index 0000000..9799b52 --- /dev/null +++ b/scine_puffin/tests/conftest.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + + +import pytest +from .db_setup import get_clean_db + + +@pytest.fixture(scope='session', autouse=True) +def precondition(): + try: + _ = get_clean_db() + except RuntimeError as e: + pytest.exit(f'{str(e)}\nFirst start database before running unittests.') diff --git a/scine_puffin/tests/cp2k/__init__.py b/scine_puffin/tests/cp2k/__init__.py index 347efce..cc9acd3 100644 --- a/scine_puffin/tests/cp2k/__init__.py +++ b/scine_puffin/tests/cp2k/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/cp2k/cp2k_test.py b/scine_puffin/tests/cp2k/cp2k_test.py index d520ad8..13f8d4a 100644 --- a/scine_puffin/tests/cp2k/cp2k_test.py +++ b/scine_puffin/tests/cp2k/cp2k_test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/daemon_test.py b/scine_puffin/tests/daemon_test.py index 5abbbfa..82cc6f3 100644 --- a/scine_puffin/tests/daemon_test.py +++ b/scine_puffin/tests/daemon_test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/db_setup.py b/scine_puffin/tests/db_setup.py index c395eaa..779bd5d 100644 --- a/scine_puffin/tests/db_setup.py +++ b/scine_puffin/tests/db_setup.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ # Standard library imports import os -from typing import Optional +from typing import Any, Optional def get_test_db_credentials(name: str = "puffin_unittests"): @@ -25,7 +25,7 @@ def get_test_db_credentials(name: str = "puffin_unittests"): Returns ------- result :: db.Credentials - The credentials to access the test database. + The credentials to access the test database. """ import scine_database as db ip = os.environ.get('TEST_MONGO_DB_IP', "127.0.0.1") @@ -63,7 +63,7 @@ def get_clean_db(name: str = "puffin_unittests"): return manager -def add_structure(manager, xyz_path, label, charge: int = 0, multiplicity: int = 1): +def add_structure(manager, xyz_path, label, charge: int = 0, multiplicity: int = 1, model: Optional[Any] = None): """ Generates a Structure in the database according to the specifications given as arguments. @@ -80,6 +80,9 @@ def add_structure(manager, xyz_path, label, charge: int = 0, multiplicity: int = The charge of the structure multiplicity :: int The multiplicity of the structure + model :: db.Model, optional + The model of the structure to be generated. Take db.Model("dftb3", "dftb3", "") with program "Sparrow" + as default. Returns ------- @@ -87,11 +90,11 @@ def add_structure(manager, xyz_path, label, charge: int = 0, multiplicity: int = The generated Structure linked to its collection """ import scine_database as db - import scine_utilities as utils + if model is None: + model = db.Model("dftb3", "dftb3", "") + model.program = "sparrow" structures = manager.get_collection("structures") - atoms, _ = utils.io.read(xyz_path) - structure = db.Structure.make(atoms, charge, multiplicity, structures) - structure.set_label(label) + structure = db.Structure.make(xyz_path, charge, multiplicity, model, label, structures) return structure @@ -125,13 +128,16 @@ def add_calculation(manager, model, job, structures, settings: Optional[dict] = if settings is None: settings = {} calculations = manager.get_collection("calculations") + if model.program.lower() == "any": + model.program = "sparrow" calculation = db.Calculation.make(model, job, structures, calculations) calculation.set_settings(utils.ValueCollection(settings)) calculation.set_status(db.Status.NEW) return calculation -def add_compound_and_structure(manager, xyz_file: str = "proline_acid.xyz"): +def add_compound_and_structure(manager, xyz_file: str = "proline_acid.xyz", charge: int = 0, multiplicity: int = 1, + label: Optional[Any] = None, model: Optional[Any] = None) -> Any: """ Generates a Compound with one structure according to the given xyz_file. @@ -141,6 +147,15 @@ def add_compound_and_structure(manager, xyz_file: str = "proline_acid.xyz"): The manager of the database to create data in. xyz_file :: str The xyz file name for the structure that is added + charge :: int + The charge of the structure + multiplicity :: int + The spin multiplicity of the structure + label :: db.Label, optional + The label of the structure to be generated. + model :: db.Model, optional + The model of the structure to be generated. Take db.Model("dftb3", "dftb3", "") with program "Sparrow" + as default. Returns ------- compound :: db.Compound @@ -150,13 +165,15 @@ def add_compound_and_structure(manager, xyz_file: str = "proline_acid.xyz"): from .resources import resource_path compounds = manager.get_collection("compounds") path = os.path.join(resource_path(), xyz_file) - structure = add_structure(manager, path, db.Label.MINIMUM_OPTIMIZED) + if label is None: + label = db.Label.MINIMUM_OPTIMIZED + structure = add_structure(manager, path, label, charge, multiplicity, model) new_compound = db.Compound.make([structure.id()], compounds) structure.set_compound(new_compound.id()) return new_compound -def add_flask_and_structure(manager, xyz_file: str = "proline_acid.xyz"): +def add_flask_and_structure(manager, xyz_file: str = "proline_acid.xyz", model: Optional[Any] = None): """ Generates a Flask with one structure according to the given xyz_file. @@ -175,7 +192,7 @@ def add_flask_and_structure(manager, xyz_file: str = "proline_acid.xyz"): from .resources import resource_path flasks = manager.get_collection("flasks") path = os.path.join(resource_path(), xyz_file) - structure = add_structure(manager, path, db.Label.COMPLEX_OPTIMIZED) + structure = add_structure(manager, path, db.Label.COMPLEX_OPTIMIZED, model=model) new_flask = db.Flask.make([structure.id()], [], flasks) structure.set_compound(new_flask.id()) return new_flask diff --git a/scine_puffin/tests/jobs/__init__.py b/scine_puffin/tests/jobs/__init__.py index 347efce..cc9acd3 100644 --- a/scine_puffin/tests/jobs/__init__.py +++ b/scine_puffin/tests/jobs/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/jobs/test_conformers.py b/scine_puffin/tests/jobs/test_conformers.py index 93dbb0b..6a3c121 100644 --- a/scine_puffin/tests/jobs/test_conformers.py +++ b/scine_puffin/tests/jobs/test_conformers.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/jobs/test_gaussian_cm5_charges_job.py b/scine_puffin/tests/jobs/test_gaussian_cm5_charges_job.py index 1ef1d8f..5ddaad5 100644 --- a/scine_puffin/tests/jobs/test_gaussian_cm5_charges_job.py +++ b/scine_puffin/tests/jobs/test_gaussian_cm5_charges_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -32,6 +32,7 @@ def test_water(self): water = os.path.join(resource_path(), "water.xyz") structure = add_structure(self.manager, water, db.Label.USER_OPTIMIZED) model = db.Model('dft', 'PBEPBE', '6-31G') + model.program = "gaussian" job = db.Job('gaussian_charge_model_5') calculation = add_calculation(self.manager, model, job, [structure.id()]) diff --git a/scine_puffin/tests/jobs/test_graph.py b/scine_puffin/tests/jobs/test_graph.py index 28f1149..42b7d4c 100644 --- a/scine_puffin/tests/jobs/test_graph.py +++ b/scine_puffin/tests/jobs/test_graph.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/jobs/test_kinetx_kinetic_modeling_job.py b/scine_puffin/tests/jobs/test_kinetx_kinetic_modeling_job.py index 9353d8a..b63ca10 100644 --- a/scine_puffin/tests/jobs/test_kinetx_kinetic_modeling_job.py +++ b/scine_puffin/tests/jobs/test_kinetx_kinetic_modeling_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -87,11 +87,7 @@ def test_concentrations(self): structures = self.manager.get_collection("structures") compounds = self.manager.get_collection("compounds") flasks = self.manager.get_collection("flasks") - results = calculation.get_results() assert properties.count(dumps({})) == n_compounds * 3 + len(all_reaction_ids) * 3 - assert len(results.property_ids) == n_compounds * 3 + len(all_reaction_ids) * 3 - assert len(results.structure_ids) == 0 - assert len(results.elementary_step_ids) == 0 for c in all_compounds: assert not c.explore() for i, s_id in enumerate(all_structure_ids): diff --git a/scine_puffin/tests/jobs/test_orca_geometry_optimization_job.py b/scine_puffin/tests/jobs/test_orca_geometry_optimization_job.py index c407e88..ebfd74b 100644 --- a/scine_puffin/tests/jobs/test_orca_geometry_optimization_job.py +++ b/scine_puffin/tests/jobs/test_orca_geometry_optimization_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -32,6 +32,7 @@ def test_energy(self): water = os.path.join(resource_path(), "water.xyz") structure = add_structure(self.manager, water, db.Label.USER_GUESS) model = db.Model('dft', 'pbe', 'def2-SVP') + model.program = "orca" job = db.Job('orca_geometry_optimization') calculation = add_calculation(self.manager, model, job, [structure.id()]) diff --git a/scine_puffin/tests/jobs/test_rms_input_file_creator.py b/scine_puffin/tests/jobs/test_rms_input_file_creator.py new file mode 100644 index 0000000..9efc16a --- /dev/null +++ b/scine_puffin/tests/jobs/test_rms_input_file_creator.py @@ -0,0 +1,119 @@ +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +import os + +import scine_database as db + +from ..testcases import ( + JobTestCase, +) +from ..db_setup import ( + add_compound_and_structure, + add_reaction, +) + + +class RMSInputFileCreatorTest(JobTestCase): + def test_phase_entry(self): + """ + Idea of the test: Check if the created dictionaries have the expected format. + """ + import scine_utilities as utils + from ...utilities.rms_input_file_creator import create_rms_phase_entry + aggregate_list = ["63d12175fc016ecdcf53d4e9", "63d12175fc016ecdcf53d4e1"] + + phase_list = create_rms_phase_entry(aggregate_list, [3.0, 2.0], [0.0, 1.0], "Some-Solvent") + r = utils.MOLAR_GAS_CONSTANT + reference = [{'Species': [ + {'name': '63d12175fc016ecdcf53d4e9', + 'radicalelectrons': 0, + 'thermo': { + 'polys': [{ + 'Tmax': 5000.0, + 'Tmin': 1.0, + 'coefs': [0.0, 0.0, 0.0, 0.0, 0.0, 3.0/r, 0.0/r], + 'type': 'NASApolynomial'}], + 'type': 'NASA'}, + 'type': 'Species'}, + {'name': '63d12175fc016ecdcf53d4e1', + 'radicalelectrons': 0, + 'thermo': { + 'polys': [{'Tmax': 5000.0, + 'Tmin': 1.0, + 'coefs': [0.0, 0.0, 0.0, 0.0, 0.0, 2.0/r, 1.0/r], + 'type': 'NASApolynomial'}], + 'type': 'NASA'}, 'type': 'Species'}, + {'name': 'Some-Solvent', + 'radicalelectrons': 0, + 'thermo': { + 'polys': [{'Tmax': 5000.0, + 'Tmin': 1.0, + 'coefs': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + 'type': 'NASApolynomial'}], + 'type': 'NASA'}, 'type': 'Species'}], + 'name': 'phase'}] + assert reference == phase_list + + def test_create_rms_reaction_entry(self): + """ + Idea of the test: Check if the created dictionaries have the expected format. + """ + from ...utilities.rms_input_file_creator import create_rms_reaction_entry + n_compounds = 2 + all_compounds = [add_compound_and_structure(self.manager) for _ in range(n_compounds)] + reactions = self.manager.get_collection("reactions") + c_ids = [c.id() for c in all_compounds] + all_reaction_ids = [ + add_reaction(self.manager, [c_ids[0]], [c_ids[1]]).id(), + ] + ea = [0.00] + a = [0.1] + n = [0] + + reactants = [] + for r_id in all_reaction_ids: + r = db.Reaction(r_id, reactions) + reactants.append(([a_id.string() for a_id in r.get_reactants(db.Side.BOTH)[0]], + [a_id.string() for a_id in r.get_reactants(db.Side.BOTH)[1]])) + reaction_list = create_rms_reaction_entry(a, n, ea, reactants) + reference = [{ + 'kinetics': { + 'A': a[0], + 'Ea': ea[0], + 'n': n[0], + 'type': 'Arrhenius'}, + 'products': [c_ids[1].string()], + 'reactants': [c_ids[0].string()], + 'type': 'ElementaryReaction'}] + assert reaction_list == reference + + def test_create_rms_yml_file(self): + """ + Idea of the test: Check if the rms yaml input file can be created without problems. + """ + from ...utilities.rms_input_file_creator import create_rms_yml_file + n_compounds = 2 + all_compounds = [add_compound_and_structure(self.manager) for _ in range(n_compounds)] + reactions = self.manager.get_collection("reactions") + c_ids = [c.id() for c in all_compounds] + all_reaction_ids = [ + add_reaction(self.manager, [c_ids[0]], [c_ids[1]]).id(), + ] + ea = [0.00] + a = [0.1] + n = [0] + h = [9.0, 38.0] + s = [0.03, 0.69] + + reactants = [] + for r_id in all_reaction_ids: + r = db.Reaction(r_id, reactions) + reactants.append(([a_id.string() for a_id in r.get_reactants(db.Side.BOTH)[0]], + [a_id.string() for a_id in r.get_reactants(db.Side.BOTH)[1]])) + file_name = "puffin_test_chem.rms" + create_rms_yml_file([c.id().string() for c in all_compounds], h, s, a, n, ea, reactants, file_name, + "Some-Solvent", 0.5, None) + os.remove(file_name) diff --git a/scine_puffin/tests/jobs/test_rms_kinetic_modeling_job.py b/scine_puffin/tests/jobs/test_rms_kinetic_modeling_job.py new file mode 100644 index 0000000..3870e40 --- /dev/null +++ b/scine_puffin/tests/jobs/test_rms_kinetic_modeling_job.py @@ -0,0 +1,417 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from typing import Union + +from ..testcases import ( + JobTestCase, + skip_without +) +from ..db_setup import ( + add_calculation, + add_compound_and_structure, + add_reaction, + add_flask_and_structure +) + + +class RMSKineticModelingTest(JobTestCase): + + @skip_without('database', "julia", "diffeqpy") + def test_concentrations_ideal_gas(self): + from scine_puffin.jobs.rms_kinetic_modeling import RmsKineticModeling + import scine_database as db + + n_compounds = 5 + all_compounds = [add_compound_and_structure(self.manager) for _ in range(n_compounds - 1)] + flask = add_flask_and_structure(self.manager) + all_compounds.append(flask) + all_structure_ids = [c.get_centroid() for c in all_compounds] + c_ids = [c.id() for c in all_compounds] + all_reaction_ids = [ + add_reaction(self.manager, [c_ids[0]], [c_ids[2]]).id(), + add_reaction(self.manager, [c_ids[1]], [c_ids[3]]).id(), + add_reaction(self.manager, [c_ids[2], c_ids[3]], [c_ids[0], c_ids[1]]).id(), + add_reaction(self.manager, [c_ids[0], c_ids[1]], [c_ids[4], c_ids[4]]).id() + ] + ea = [0.00, 0.00, 0.00, 0.00] + a = [0.1, 0.05, 0.02, 0.02] + entropies = [0.5, 0.5, 0.5, 0.5, 0.5] + enthalpies = [-2e+2, -1.98e+2, -1.97e+2, -1.99e+2, -2.1e+2] + n = [0, 0, 0, 0] + start_concentrations = [0.5, 0.4, 0.0, 0.0, 0.0] + reference_data = [0.20530677, 0.15515132, 0.20505846, 0.15521392, 0.17926953] + reference_max = [0.5, 0.4, 0.20505868, 0.15745688, 0.19571166] + reference_flux = [0.31232202, 0.26808923, 0.20617824, 0.16194545, 0.21228757] + + model = db.Model('FAKE', '', '') + job = db.Job('rms_kinetic_modeling') + settings = { + "solver": "CVODE_BDF", + "ea": ea, + "arrhenius_prefactors": a, + "arrhenius_temperature_exponents": n, + "start_concentrations": start_concentrations, + "reaction_ids": [str(oid) for oid in all_reaction_ids], + "aggregate_ids": [str(oid) for oid in c_ids], + "aggregate_types": [db.CompoundOrFlask.COMPOUND for _ in range(4)] + [db.CompoundOrFlask.FLASK], + "entropies": entropies, + "enthalpies": enthalpies, + "energy_model_program": "DUMMY", + "phase_type": "ideal_gas", + "max_time": 36000.0, + "absolute_tolerance": 1e-20, + "relative_tolerance": 1e-6, + "reactor_pressure": 1E+5, + } + calculation = add_calculation(self.manager, model, job, all_structure_ids, settings) + # Run calculation/job + config = self.get_configuration() + job = RmsKineticModeling() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + + for c in all_compounds: + assert not c.explore() + all_structure_ids = [c.get_centroid() for c in all_compounds] + structures = self.manager.get_collection("structures") + properties = self.manager.get_collection("properties") + for i, s_id in enumerate(all_structure_ids): + structure = db.Structure(s_id, structures) + assert structure.has_property("final_concentration") + assert structure.has_property("max_concentration") + assert structure.has_property("concentration_flux") + final_concentration = db.NumberProperty(structure.get_properties("final_concentration")[0], properties) + max_concentration = db.NumberProperty(structure.get_properties("max_concentration")[0], properties) + concentration_flux = db.NumberProperty(structure.get_properties("concentration_flux")[0], properties) + self.assertAlmostEqual(final_concentration.get_data(), reference_data[i], delta=1e-2) + self.assertAlmostEqual(max_concentration.get_data(), reference_max[i], delta=1e-2) + self.assertAlmostEqual(concentration_flux.get_data(), reference_flux[i], delta=1e-1) + + @skip_without('database', "julia", "diffeqpy") + def test_concentrations_ideal_dilute_solution(self): + from scine_puffin.jobs.rms_kinetic_modeling import RmsKineticModeling + import scine_database as db + + n_compounds = 5 + all_compounds = [add_compound_and_structure(self.manager) for _ in range(n_compounds - 1)] + flask = add_flask_and_structure(self.manager) + all_compounds.append(flask) + all_structure_ids = [c.get_centroid() for c in all_compounds] + c_ids = [c.id() for c in all_compounds] + all_reaction_ids = [ + add_reaction(self.manager, [c_ids[0]], [c_ids[2]]).id(), + add_reaction(self.manager, [c_ids[1]], [c_ids[3]]).id(), + add_reaction(self.manager, [c_ids[2], c_ids[3]], [c_ids[0], c_ids[1]]).id(), + add_reaction(self.manager, [c_ids[0], c_ids[1]], [c_ids[4], c_ids[4]]).id() + ] + ea = [0.00, 0.00, 0.00, 3.00] + a = [1.1e+3, 1.05e+3, 1.02e+4, 1.02] + entropies = [0.5, 0.5, 0.5, 0.5, 0.5] + enthalpies = [-2e+2, -1.98e+2, -1.97e+2, -1.99e+2, -2.1e+2] + n = [0, 0, 0, 0] + start_concentrations = [0.5, 0.4, 0.0, 0.0, 0.0] + reference_data = [0.20531956, 0.15521179, 0.20514741, 0.15525519, 0.17906605, 14.3] + reference_max = [0.5, 0.4, 0.22863946, 0.22209669, 0.17906605, 14.3] + reference_flux = [0.38552628, 0.37854594, 0.29599325, 0.28901291, 0.17906606] + + model = db.Model('FAKE', '', '') + t = 430.15 + model.temperature = t + model.solvent = "water" + job = db.Job('rms_kinetic_modeling') + settings = { + "solver": "CVODE_BDF", + "ea": ea, + "arrhenius_prefactors": a, + "arrhenius_temperature_exponents": n, + "start_concentrations": start_concentrations, + "reaction_ids": [str(oid) for oid in all_reaction_ids], + "aggregate_ids": [str(oid) for oid in c_ids], + "aggregate_types": [db.CompoundOrFlask.COMPOUND for _ in range(4)] + [db.CompoundOrFlask.FLASK], + "entropies": entropies, + "enthalpies": enthalpies, + "energy_model_program": "DUMMY", + "phase_type": "ideal_dilute_solution", + "max_time": 36000.0, + "absolute_tolerance": 1e-20, + "relative_tolerance": 1e-9, + "reactor_pressure": 1E+5, + "reactor_solvent": "water", + "diffusion_limited": False, + } + calculation = add_calculation(self.manager, model, job, all_structure_ids, settings) + # Run calculation/job + config = self.get_configuration() + job = RmsKineticModeling() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + + for c in all_compounds: + assert not c.explore() + all_structure_ids = [c.get_centroid() for c in all_compounds] + structures = self.manager.get_collection("structures") + properties = self.manager.get_collection("properties") + for prop in properties.iterate_all_properties(): + prop.link(properties) + assert abs(float(prop.get_model().temperature) - t) < 1e-9 + + for i, s_id in enumerate(all_structure_ids): + structure = db.Structure(s_id, structures) + assert structure.has_property("final_concentration") + assert structure.has_property("max_concentration") + assert structure.has_property("concentration_flux") + final_concentration = db.NumberProperty(structure.get_properties("final_concentration")[0], properties) + max_concentration = db.NumberProperty(structure.get_properties("max_concentration")[0], properties) + concentration_flux = db.NumberProperty(structure.get_properties("concentration_flux")[0], properties) + self.assertAlmostEqual(final_concentration.get_data(), reference_data[i], delta=1e-2) + self.assertAlmostEqual(max_concentration.get_data(), reference_max[i], delta=1e-2) + self.assertAlmostEqual(concentration_flux.get_data(), reference_flux[i], delta=1e-1) + + @skip_without('database', "julia", "diffeqpy") + def test_sensitivity_analysis(self): + from scine_puffin.jobs.rms_kinetic_modeling import RmsKineticModeling + import scine_database as db + + n_compounds = 15 + all_compounds = [add_compound_and_structure(self.manager) for _ in range(n_compounds)] + all_structure_ids = [c.get_centroid() for c in all_compounds] + c_ids = [c.id() for c in all_compounds] + all_reaction_ids = [ + add_reaction(self.manager, [c_ids[8]], [c_ids[11], c_ids[12]]).id(), + add_reaction(self.manager, [c_ids[5]], [c_ids[8]]).id(), + add_reaction(self.manager, [c_ids[1], c_ids[1]], [c_ids[5]]).id(), + add_reaction(self.manager, [c_ids[9]], [c_ids[13], c_ids[14], c_ids[1]]).id(), + add_reaction(self.manager, [c_ids[4]], [c_ids[2]]).id(), + add_reaction(self.manager, [c_ids[4]], [c_ids[9]]).id(), + add_reaction(self.manager, [c_ids[0], c_ids[1]], [c_ids[2]]).id(), + add_reaction(self.manager, [c_ids[0], c_ids[1]], [c_ids[4]]).id(), + add_reaction(self.manager, [c_ids[5]], [c_ids[7]]).id(), + add_reaction(self.manager, [c_ids[6]], [c_ids[10], c_ids[1]]).id(), + add_reaction(self.manager, [c_ids[5]], [c_ids[6]]).id(), + add_reaction(self.manager, [c_ids[2]], [c_ids[9]]).id(), + add_reaction(self.manager, [c_ids[4]], [c_ids[3], c_ids[1]]).id(), + add_reaction(self.manager, [c_ids[3], c_ids[1]], [c_ids[2]]).id() + ] + ea = [0.0, + 141258.005874334, + 22350.1045931669, + 0.0, + 37769.2845750187, + 87707.0192024107, + 24118.3178788118, + 23942.6512074803, + 149593.511487877, + 0.0, + 191188.014352586, + 132868.656300405, + 0.0, + 32749.0495510235] + a = [8817012463061.74 for _ in all_reaction_ids] + n = [0 for _ in all_reaction_ids] + entropies = [483.928799667987, + 294.419261898161, + 652.415361315592, + 477.014094417863, + 675.345450235711, + 515.52359453181, + 422.822929205339, + 417.145780635592, + 507.650951023459, + 718.709871540267, + 280.378644164628, + 305.918813708162, + 277.578619144421, + 225.609287171713, + 404.146435972299] + enthalpies = [-1161186535.6783, + -506407081.521706, + -1667622787.30424, + -1161198092.3675, + -1667613260.10378, + -1012822836.15114, + -1012816820.21241, + -1012951947.21609, + -1012883102.19312, + -1667578869.90783, + -506390045.122877, + -509583214.885545, + -503295550.040687, + -303469253.644786, + -857659812.764682] + start_concentrations = [1.0, 1.0] + [0.0 for _ in range(len(all_compounds) - 2)] + + # reference from working run. + reference_final = [7.48691760e-02, 8.98107191e-01, 2.21462075e-03, 8.25022273e-01, 2.43288613e-03, + 4.82799549e-02, 2.35394815e-14, 2.90416198e-05, 1.17193433e-09, 2.98648216e-05, + 2.58125417e-10, 2.98720961e-04, 2.98720961e-04, 9.54311790e-02, 9.54311790e-02] + reference_max = [1.00000000e+00, 1.00000000e+00, 1.37526691e-02, 9.12090645e-01, 2.78100405e-02, + 4.83372810e-02, 2.35394815e-14, 2.90416198e-05, 1.17193433e-09, 2.98823158e-05, + 2.58125417e-10, 2.98720961e-04, 2.98720961e-04, 9.54311790e-02, 9.54311790e-02] + # reference_flux = [2.29282693e+02, 5.42155028e+02, 2.29083511e+02, 2.32554989e+02, + # 2.32849987e+02, 4.00383137e+01, 8.35968464e-10, 2.90416198e-05, + # 5.87767688e-04, 3.36835658e-01, 5.77819508e-10, 2.89045556e-04, + # 2.89045556e-04, 2.41374632e-01, 2.41374632e-01] + reference_flux = [3.02126416e+02, 7.58527988e+02, 2.64552233e+02, 2.33897598e+02, + 2.71567599e+02, 1.11175726e+02, 1.05605466e-09, 2.90416198e-05, + 6.02153590e-04, 2.48637533e-01, 7.97905708e-10, 3.03431457e-04, + 3.03431457e-04, 1.53176486e-01, 1.53176486e-01] + + reference_c_max_sens = [1.83714115e-01, 1.96540904e-01, 3.48053816e-02, 1.92587203e-01, 5.71615968e-02, + 8.24115883e-02, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.37171546e-04, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 8.70804545e-02, 8.70804545e-02] + reference_c_max_ea_sens = [0.00000000e+00, 0.00000000e+00, 1.71595760e-03, 1.37171547e-04, 0.00000000e+00, + 1.37171571e-04, 1.13038940e-02, 1.21804830e-02, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00, 7.49768373e-04, 1.08392802e-02] + reference_c_final_ea_sens = [0.00000000e+00, 0.00000000e+00, 1.66605840e-10, 1.66397229e-10, 0.00000000e+00, + 8.47292592e-09, 1.66605840e-10, 1.66605840e-10, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00, 1.66466396e-10, 1.72988956e-10] + reference_c_flux_ea_sens = [0.00000000e+00, 0.00000000e+00, 4.659885053e-06, 8.02638585e-03, 0.00000000e+00, + 6.81609605e-03, 1.87939504e-05, 3.30799624e-05, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00, 4.94748506e-06, 3.84500387e-05] + reference_var_final = [1.77284235e-03, 6.28070022e-03, 2.45146925e-06, 3.28166800e-03, + 2.95284267e-06, 1.51282096e-03, 1.91576991e-27, 5.49343670e-10, + 2.74160649e-17, 4.54564355e-10, 4.33879991e-20, 5.81191146e-08, + 5.81191146e-08, 8.41080667e-04, 8.41080667e-04] + reference_var_max = [0.00000000e+00, 0.00000000e+00, 8.90760701e-05, 2.47119289e-03, + 2.66124536e-04, 1.52723745e-03, 1.91576991e-27, 5.49343670e-10, + 2.74160649e-17, 4.55322785e-10, 4.33879991e-20, 5.81191146e-08, + 5.81191146e-08, 8.41080667e-04, 8.41080667e-04] + reference_var_flux = [2847.6573183771347, 17956.701741841487, 2183.194068349496, 1706.3903416116216, + 2300.5616220011425, 385.15557455260193, 5.93208300e-18, 5.49343669e-10, + 6.42188978e-06, 1.75102380e-03, 5.04266808e-18, 5.28459041e-06, + 5.28459041e-06, 0.0002534526815016325, 0.0002534526815016325] + + model = db.Model('FAKE', '', '') + t = 430.15 + model.temperature = t + model.solvent = "water" + job = db.Job('rms_kinetic_modeling') + settings = { + "solver": "CVODE_BDF", + "ea": ea, + "arrhenius_prefactors": a, + "arrhenius_temperature_exponents": n, + "start_concentrations": start_concentrations, + "reaction_ids": [str(oid) for oid in all_reaction_ids], + "aggregate_ids": [str(oid) for oid in c_ids], + "aggregate_types": [db.CompoundOrFlask.COMPOUND for _ in all_compounds], + "entropies": entropies, + "enthalpies": enthalpies, + "energy_model_program": "DUMMY", + "phase_type": "ideal_dilute_solution", + "max_time": 100.0, + "absolute_tolerance": 1e-20, + "relative_tolerance": 1e-9, + "reactor_pressure": 1E+5, + "reactor_solvent": "water", + "diffusion_limited": False, + "sensitivity_analysis": "morris", + "ea_lower_uncertainty": [1e+4 for _ in all_reaction_ids], + "ea_upper_uncertainty": [1e+4 for _ in all_reaction_ids], + "enthalpy_lower_uncertainty": [5e+3 for _ in c_ids], + "enthalpy_upper_uncertainty": [5e+3 for _ in c_ids], + "sample_size": 2, + "local_sensitivities": True, + "save_oaat_var": True, + "enforce_mass_balance": False, + "screen_global_sens_size": 0 + } + calculation = add_calculation(self.manager, model, job, all_structure_ids, settings) + # Run calculation/job + config = self.get_configuration() + config["resources"]["cores"] = 2 + job = RmsKineticModeling() + job.force_parallel = True + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + + for c in all_compounds: + assert not c.explore() + all_structure_ids = [c.get_centroid() for c in all_compounds] + structures = self.manager.get_collection("structures") + properties = self.manager.get_collection("properties") + reactions = self.manager.get_collection("reactions") + compounds = self.manager.get_collection("compounds") + flasks = self.manager.get_collection("flasks") + for prop in properties.iterate_all_properties(): + prop.link(properties) + assert abs(float(prop.get_model().temperature) - t) < 1e-9 + + for i, s_id in enumerate(all_structure_ids): + structure = db.Structure(s_id, structures) + assert structure.has_property("final_concentration") + assert structure.has_property("max_concentration") + assert structure.has_property("concentration_flux") + assert structure.has_property("max_free_energy_sensitivity_oaat_flux") + assert structure.has_property("max_free_energy_sensitivity_oaat_max") + assert structure.has_property("var_final_c") + assert structure.has_property("var_max_c") + assert structure.has_property("var_flux_c") + assert structure.has_property("max_free_energy_sensitivity_morris_mu_c_max") + assert structure.has_property("max_free_energy_sensitivity_morris_mu_star_c_max") + assert structure.has_property("max_free_energy_sensitivity_morris_sigma_c_max") + assert structure.has_property("max_free_energy_sensitivity_morris_mu_c_final") + assert structure.has_property("max_free_energy_sensitivity_morris_mu_star_c_final") + assert structure.has_property("max_free_energy_sensitivity_morris_sigma_c_final") + assert structure.has_property("morris_mean_c_max") + assert structure.has_property("morris_mean_c_final") + assert structure.has_property("morris_mean_c_flux") + assert structure.has_property("morris_var_c_max") + assert structure.has_property("morris_var_c_final") + assert structure.has_property("morris_var_c_flux") + final_concentration = db.NumberProperty(structure.get_properties("final_concentration")[0], properties) + max_concentration = db.NumberProperty(structure.get_properties("max_concentration")[0], properties) + concentration_flux = db.NumberProperty(structure.get_properties("concentration_flux")[0], properties) + c_max_sensitivity = db.NumberProperty(structure.get_properties("max_free_energy_sensitivity_oaat_max")[0], + properties) + var_final = db.NumberProperty(structure.get_properties("var_final_c")[0], properties) + var_max = db.NumberProperty(structure.get_properties("var_max_c")[0], properties) + var_flux = db.NumberProperty(structure.get_properties("var_flux_c")[0], properties) + self.assertAlmostEqual(final_concentration.get_data(), reference_final[i], delta=1e-2) + self.assertAlmostEqual(max_concentration.get_data(), reference_max[i], delta=1e-3) + self.assertAlmostEqual(concentration_flux.get_data(), reference_flux[i], + delta=1e-2 * max(1.0, reference_flux[i])) + self.assertAlmostEqual(c_max_sensitivity.get_data(), reference_c_max_sens[i], delta=1e-3) + self.assertAlmostEqual(var_final.get_data(), reference_var_final[i], delta=1e-4) + self.assertAlmostEqual(var_max.get_data(), reference_var_max[i], delta=1e-4) + self.assertAlmostEqual(var_flux.get_data(), reference_var_flux[i], + delta=1e-2 * max(1.0, reference_var_flux[i])) + + for r_str_id, ref_max_sens, ref_final_sens, ref_flux_sens in zip( + settings["reaction_ids"], reference_c_max_ea_sens, reference_c_final_ea_sens, reference_c_flux_ea_sens): + reaction = db.Reaction(db.ID(r_str_id), reactions) + a_id = reaction.get_reactants(db.Side.BOTH)[0][0] + a: Union[db.Compound, db.Flask] = db.Compound(a_id, compounds) + if not a.exists(): + a = db.Flask(a_id, flasks) + centroid = db.Structure(a.get_centroid(), structures) + for ref, label in zip([ref_max_sens, ref_final_sens, ref_flux_sens], ["max", "final", "flux"]): + prop_label = r_str_id + "_reaction_barrier_sensitivity_oaat_" + label + assert centroid.has_property(prop_label) + ea_max_sens_prop = db.NumberProperty(centroid.get_properties(prop_label)[0], properties) + self.assertAlmostEqual(ea_max_sens_prop.get_data(), ref, delta=1e-2) + + settings["sensitivity_analysis"] = "sobol" + calculation = add_calculation(self.manager, model, db.Job("rms_kinetic_modeling"), all_structure_ids, settings) + # Run calculation/job + job = RmsKineticModeling() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + + for i, s_id in enumerate(all_structure_ids): + structure = db.Structure(s_id, structures) + assert structure.has_property("max_free_energy_sensitivity_sobol_s1_c_max") + assert structure.has_property("max_free_energy_sensitivity_sobol_st_c_max") + assert structure.has_property("max_free_energy_sensitivity_sobol_s1_c_final") + assert structure.has_property("max_free_energy_sensitivity_sobol_st_c_final") + assert structure.has_property("sobol_mean_c_max") + assert structure.has_property("sobol_mean_c_final") + assert structure.has_property("sobol_mean_c_flux") + assert structure.has_property("sobol_var_c_max") + assert structure.has_property("sobol_var_c_final") + assert structure.has_property("sobol_var_c_flux") diff --git a/scine_puffin/tests/jobs/test_scine_afir.py b/scine_puffin/tests/jobs/test_scine_afir.py index 6644a2b..8380fb3 100644 --- a/scine_puffin/tests/jobs/test_scine_afir.py +++ b/scine_puffin/tests/jobs/test_scine_afir.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -77,14 +77,14 @@ def run_by_label(self, input_label, expected_label): @skip_without('database', 'readuct', 'molassembler') def test_user_guess(self): import scine_database as db - self.run_by_label(db.Label.USER_GUESS, db.Label.USER_OPTIMIZED) + self.run_by_label(db.Label.USER_GUESS, db.Label.USER_COMPLEX_OPTIMIZED) @skip_without('database', 'readuct', 'molassembler') def test_minimum_guess(self): import scine_database as db - self.run_by_label(db.Label.MINIMUM_GUESS, db.Label.MINIMUM_OPTIMIZED) + self.run_by_label(db.Label.MINIMUM_GUESS, db.Label.COMPLEX_OPTIMIZED) @skip_without('database', 'molassembler', 'readuct') def test_surface_guess(self): import scine_database as db - self.run_by_label(db.Label.SURFACE_GUESS, db.Label.SURFACE_OPTIMIZED) + self.run_by_label(db.Label.SURFACE_GUESS, db.Label.SURFACE_COMPLEX_OPTIMIZED) diff --git a/scine_puffin/tests/jobs/test_scine_bond_orders_job.py b/scine_puffin/tests/jobs/test_scine_bond_orders_job.py index 7b6911f..88d212e 100644 --- a/scine_puffin/tests/jobs/test_scine_bond_orders_job.py +++ b/scine_puffin/tests/jobs/test_scine_bond_orders_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/jobs/test_scine_bspline_optimization.py b/scine_puffin/tests/jobs/test_scine_bspline_optimization.py index f59c72f..784e9ed 100644 --- a/scine_puffin/tests/jobs/test_scine_bspline_optimization.py +++ b/scine_puffin/tests/jobs/test_scine_bspline_optimization.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -21,12 +21,14 @@ from ..resources import resource_path -class ScineBsplineOptimizationTest(JobTestCase): +class ScineBsplineOptimizationJobTest(JobTestCase): @skip_without('database', 'readuct', 'molassembler') def test_energy_starting_from_complex(self): # import Job from scine_puffin.jobs.scine_bspline_optimization import ScineBsplineOptimization import scine_database as db + self.tearDown() + self.setUp() # Amine-addition of the proline derivative to the aldehyde group of propanal. # These structures where originally optimized with dftb3 @@ -80,12 +82,22 @@ def test_energy_starting_from_complex(self): "opt_bfgs_use_trust_radius": True, "opt_bfgs_trust_radius": 0.4, "imaginary_wavenumber_threshold": -30, - "bspline_num_integration_points": 40 + "bspline_num_integration_points": 40, + "spin_propensity_check": 0 } calculation = add_calculation(self.manager, model, job, [reactant_one_guess.id(), product_guess.id()], settings) + graph_one = json.load(open(os.path.join(resource_path(), "proline_acid_propanal_complex.json"), "r")) + graph_two = json.load(open(os.path.join(resource_path(), "proline_acid_propanal_product.json"), "r")) + # add some graphs because we will need them later. It is not important whether they actually match the + # structure + reactant_one_guess.set_graph("masm_cbor_graph", graph_one["masm_cbor_graph"]) + product_guess.set_graph("masm_cbor_graph", graph_two["masm_cbor_graph"]) + original_elementary_step = db.ElementaryStep.make([reactant_one_guess.id()], [product_guess.id()], + self.manager.get_collection("elementary_steps")) + calculation.set_auxiliaries({"elementary-step-id": original_elementary_step.id()}) # Run calculation/job config = self.get_configuration() @@ -105,14 +117,18 @@ def test_energy_starting_from_complex(self): complex_structure.link(structures) selection = {"label": "ts_optimized"} assert structures.count(json.dumps(selection)) == 1 - assert properties.count(json.dumps({})) == 14 - assert elementary_steps.count(json.dumps({})) == 2 + assert properties.count(json.dumps({})) == 15 + # 1 initial step + 2 steps from the calculation (1 x complex fragmentation, 1 x regular). + print("N-Steps", elementary_steps.count(json.dumps({}))) + assert elementary_steps.count(json.dumps({})) == 3 results = calculation.get_results() - assert len(results.property_ids) == 10 + assert len(results.property_ids) == 11 assert len(results.structure_ids) == 5 assert len(results.elementary_step_ids) == 2 # The regular elementary step should be the last one in the list. - new_elementary_step = db.ElementaryStep(results.elementary_step_ids[-1], elementary_steps) + # The first step is the complex fragmentation, second is the regular one, third is the embedding for the + # LHS. + new_elementary_step = db.ElementaryStep(results.elementary_step_ids[1], elementary_steps) new_ts = db.Structure(new_elementary_step.get_transition_state(), structures) assert new_ts.has_property('electronic_energy') energy_props = new_ts.get_properties("electronic_energy") @@ -132,7 +148,7 @@ def test_energy_starting_from_complex(self): assert structure.has_property('bond_orders') energy_props = structure.get_properties("electronic_energy") energy = db.NumberProperty(energy_props[0], properties) - self.assertAlmostEqual(energy.get_data(), all_energies[i], delta=1e-1) + self.assertAlmostEqual(energy.get_data(), all_energies[i], delta=1e-3) @skip_without('database', 'readuct', 'molassembler') def test_collapsing_spline_ends(self): @@ -193,18 +209,27 @@ def test_collapsing_spline_ends(self): "opt_bfgs_trust_radius": 0.4, "imaginary_wavenumber_threshold": -30, "bspline_num_integration_points": 40, - "sp_expect_charge_separation": True + "sp_expect_charge_separation": True, } calculation = add_calculation(self.manager, model, job, [reactant_one_guess.id(), product_guess.id()], settings) + graph_one = json.load(open(os.path.join(resource_path(), "proline_acid.json"), "r")) + graph_two = json.load(open(os.path.join(resource_path(), "propanal.json"), "r")) + # add some graphs because we will need them later. It is not important whether they actually match the structure + reactant_one_guess.set_graph("masm_cbor_graph", graph_one["masm_cbor_graph"]) + product_guess.set_graph("masm_cbor_graph", graph_two["masm_cbor_graph"]) + original_elementary_step = db.ElementaryStep.make([reactant_one_guess.id()], [product_guess.id()], + self.manager.get_collection("elementary_steps")) + calculation.set_auxiliaries({"elementary-step-id": original_elementary_step.id()}) # Run calculation/job config = self.get_configuration() job = ScineBsplineOptimization() job.prepare(config["daemon"]["job_dir"], calculation.id()) self.run_job(job, calculation, config) + assert calculation.get_status() == db.Status.COMPLETE @skip_without('database', 'readuct', 'molassembler') def test_charges(self): @@ -271,6 +296,92 @@ def test_charges(self): calculation = add_calculation(self.manager, model, job, [reactant_one_guess.id(), product_guess.id()], settings) + graph_one = json.load(open(os.path.join(resource_path(), "proline_acid.json"), "r")) + graph_two = json.load(open(os.path.join(resource_path(), "propanal.json"), "r")) + # add some graphs because we will need them later. It is not important whether they actually match the structure + reactant_one_guess.set_graph("masm_cbor_graph", graph_one["masm_cbor_graph"]) + product_guess.set_graph("masm_cbor_graph", graph_two["masm_cbor_graph"]) + original_elementary_step = db.ElementaryStep.make([reactant_one_guess.id()], [product_guess.id()], + self.manager.get_collection("elementary_steps")) + calculation.set_auxiliaries({"elementary-step-id": original_elementary_step.id()}) + + # Run calculation/job + config = self.get_configuration() + job = ScineBsplineOptimization() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + assert calculation.get_status() == db.Status.COMPLETE + + @skip_without('database', 'readuct', 'molassembler') + def test_complex_to_complex(self): + # import Job + from scine_puffin.jobs.scine_bspline_optimization import ScineBsplineOptimization + import scine_database as db + + reactant_one_path = os.path.join(resource_path(), "acetal_methanol_complex.xyz") + reactant_one_guess = add_structure(self.manager, reactant_one_path, db.Label.MINIMUM_GUESS) + product_path = os.path.join(resource_path(), "methanol_enamine_complex.xyz") + product_guess = add_structure(self.manager, product_path, db.Label.MINIMUM_GUESS) + + model = db.Model('pm6', 'pm6', 'none') + job = db.Job('scine_bspline_optimization') + settings = { + "tsopt_convergence_max_iterations": 100, + "tsopt_convergence_step_max_coefficient": 0.002, + "tsopt_convergence_step_rms": 0.001, + "tsopt_convergence_gradient_max_coefficient": 0.0002, + "tsopt_convergence_gradient_rms": 0.0001, + "tsopt_convergence_requirement": 3, + "tsopt_convergence_delta_value": 1e-06, + "tsopt_optimizer": "bofill", + "tsopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "tsopt_bofill_trust_radius": 0.1, + "tsopt_bofill_follow_mode": 0, + "irc_convergence_max_iterations": 75, + "irc_sd_factor": 2.0, + "irc_irc_initial_step_size": 0.7, + "irc_stop_on_error": False, + "irc_convergence_step_max_coefficient": 0.002, + "irc_convergence_step_rms": 0.001, + "irc_convergence_gradient_max_coefficient": 0.0002, + "irc_convergence_gradient_rms": 0.0001, + "irc_convergence_delta_value": 1e-06, + "irc_irc_coordinate_system": "cartesianWithoutRotTrans", + "ircopt_convergence_max_iterations": 1000, + "ircopt_convergence_step_max_coefficient": 0.002, + "ircopt_convergence_step_rms": 0.001, + "ircopt_convergence_gradient_max_coefficient": 0.0002, + "ircopt_convergence_gradient_rms": 0.0001, + "ircopt_convergence_requirement": 3, + "ircopt_convergence_delta_value": 1e-06, + "ircopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "ircopt_bfgs_use_trust_radius": True, + "ircopt_bfgs_trust_radius": 0.2, + "opt_convergence_max_iterations": 1000, + "opt_convergence_step_max_coefficient": 0.002, + "opt_convergence_step_rms": 0.001, + "opt_convergence_gradient_max_coefficient": 0.0002, + "opt_convergence_gradient_rms": 0.0001, + "opt_convergence_requirement": 3, + "opt_convergence_delta_value": 1e-06, + "opt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "opt_bfgs_use_trust_radius": True, + "opt_bfgs_trust_radius": 0.4, + "sp_expect_charge_separation": True, + "spin_propensity_check": 0, + } + + calculation = add_calculation(self.manager, model, job, + [reactant_one_guess.id(), product_guess.id()], + settings) + graph_one = json.load(open(os.path.join(resource_path(), "proline_acid.json"), "r")) + graph_two = json.load(open(os.path.join(resource_path(), "propanal.json"), "r")) + # add some graphs because we will need them later. It is not important whether they actually match the structure + reactant_one_guess.set_graph("masm_cbor_graph", graph_one["masm_cbor_graph"]) + product_guess.set_graph("masm_cbor_graph", graph_two["masm_cbor_graph"]) + original_elementary_step = db.ElementaryStep.make([reactant_one_guess.id()], [product_guess.id()], + self.manager.get_collection("elementary_steps")) + calculation.set_auxiliaries({"elementary-step-id": original_elementary_step.id()}) # Run calculation/job config = self.get_configuration() diff --git a/scine_puffin/tests/jobs/test_scine_dissociation_cut_job.py b/scine_puffin/tests/jobs/test_scine_dissociation_cut_job.py index f4e66f2..46a9a0a 100644 --- a/scine_puffin/tests/jobs/test_scine_dissociation_cut_job.py +++ b/scine_puffin/tests/jobs/test_scine_dissociation_cut_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -48,14 +48,14 @@ def _setup_and_execute(self, dissociations: List[int], charge_propensity_check: reactant_guess.set_graph(key, value) model = db.Model('dftb3', 'dftb3', '') - job = db.Job('scine_dissociation_cut') + db_job = db.Job('scine_dissociation_cut') settings = { "dissociations": dissociations, "charge_propensity_check": charge_propensity_check, "max_scf_iterations": 1000, } - calculation = add_calculation(self.manager, model, job, [reactant_guess.id()], settings) + calculation = add_calculation(self.manager, model, db_job, [reactant_guess.id()], settings) # Run calculation/job config = self.get_configuration() diff --git a/scine_puffin/tests/jobs/test_scine_geometry_optimization_job.py b/scine_puffin/tests/jobs/test_scine_geometry_optimization_job.py index c2628b1..b8908d4 100644 --- a/scine_puffin/tests/jobs/test_scine_geometry_optimization_job.py +++ b/scine_puffin/tests/jobs/test_scine_geometry_optimization_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -31,6 +31,10 @@ def run_by_label(self, input_label, expected_label): water = os.path.join(resource_path(), "water.xyz") structure = add_structure(self.manager, water, input_label) model = db.Model('dftb3', 'dftb3', '') + if input_label == db.Label.SURFACE_GUESS: + prop = db.VectorProperty.make("surface_atom_indices", model, [0, 1], + self.manager.get_collection("properties")) + structure.add_property("surface_atom_indices", prop.id()) job = db.Job('scine_geometry_optimization') calculation = add_calculation(self.manager, model, job, [structure.id()]) diff --git a/scine_puffin/tests/jobs/test_scine_geometry_validation_job.py b/scine_puffin/tests/jobs/test_scine_geometry_validation_job.py new file mode 100644 index 0000000..cfc6834 --- /dev/null +++ b/scine_puffin/tests/jobs/test_scine_geometry_validation_job.py @@ -0,0 +1,587 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +import os +import numpy as np +import pytest + +from ..testcases import ( + JobTestCase, + skip_without +) + +from ..db_setup import ( + add_calculation, + add_structure +) + +from ..resources import resource_path + + +class ScineGeometryValidationJobTest(JobTestCase): + + @skip_without('database', 'readuct') + def test_non_valid_minimum(self): + from scine_puffin.jobs.scine_geometry_validation import ScineGeometryValidation + import scine_database as db + + # Setup DB for calculation + water = os.path.join(resource_path(), "water_distorted.xyz") + structure = add_structure(self.manager, water, db.Label.MINIMUM_OPTIMIZED) + structure.set_graph( + "masm_cbor_graph", + "pGFhgaVhYQBhYwJhb4GCAAFhcqNhbIKBAIEBYmxygYIAAWFzgYIAAWFzAWFjD2FnomFFgoMAAgCDAQIAYVqDAQEIYXaDAQIB") + structure.set_graph("masm_decision_list", "") + + model = db.Model('dftb3', 'dftb3', '') + model.program = "sparrow" + job = db.Job('scine_geometry_validation') + calculation = add_calculation(self.manager, model, job, [structure.id()]) + + # Run calculation/job + config = self.get_configuration() + job = ScineGeometryValidation() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + success = job.run(self.manager, calculation, config) + assert not success + + # Check comment of calculation + ref_comment = "\nError: Scine Geometry Validation Job failed with message:\n" + \ + "Structure could not be validated to be a minimum. Hessian information is stored anyway." + assert calculation.get_comment() == ref_comment + + # Check results + assert calculation.get_status() == db.Status.FAILED + assert structure.has_property("electronic_energy") + assert structure.has_property("hessian") + assert structure.has_property("normal_modes") + assert structure.has_property("frequencies") + assert structure.has_property("gibbs_free_energy") + assert structure.has_property("gibbs_energy_correction") + energy_props = structure.get_properties("electronic_energy") + hessian_props = structure.get_properties("hessian") + normal_modes_props = structure.get_properties("normal_modes") + frequencies_props = structure.get_properties("frequencies") + gibbs_free_energy_props = structure.get_properties("gibbs_free_energy") + gibbs_energy_correction_props = structure.get_properties("gibbs_energy_correction") + + assert len(energy_props) == 1 + assert len(hessian_props) == 1 + assert len(normal_modes_props) == 1 + assert len(frequencies_props) == 1 + assert len(gibbs_free_energy_props) == 1 + assert len(gibbs_energy_correction_props) == 1 + results = calculation.get_results() + assert len(results.property_ids) == 6 + assert energy_props[0] in results.property_ids + assert hessian_props[0] in results.property_ids + assert frequencies_props[0] in results.property_ids + assert normal_modes_props[0] in results.property_ids + assert gibbs_free_energy_props[0] in results.property_ids + assert gibbs_energy_correction_props[0] in results.property_ids + + # Check generated properties + # Energy + properties = self.manager.get_collection("properties") + energy = db.NumberProperty(energy_props[0]) + energy.link(properties) + self.assertAlmostEqual(energy.get_data(), -3.8236653931331333, delta=1e-1) + # Hessian + ref_hessian = np.array([ + [1.32061502e+00, 1.55335561e+00, -9.74047400e-08, -3.39090813e-01, 9.51106255e-02, 3.43230048e-07, + -9.81525023e-01, -1.64846785e+00, 3.31603713e-07, ], + [1.55335561e+00, 1.26247487e+00, -8.85166508e-08, 5.39764221e-02, 1.95699661e-02, -7.51007690e-08, + -1.60733254e+00, -1.28204482e+00, -7.55222799e-08, ], + [-9.74047400e-08, -8.85166508e-08, -4.20492614e-01, -1.33637278e-06, 1.43254119e-06, -5.61467929e-02, + 1.43377752e-06, -1.34402454e-06, 4.76637885e-01, ], + [-3.39090813e-01, 5.39764221e-02, -1.33637278e-06, 6.61626880e-01, -4.82760057e-01, -2.63388409e-07, + -3.22535721e-01, 4.28784515e-01, -2.69506583e-07, ], + [9.51106255e-02, 1.95699661e-02, 1.43254119e-06, -4.82760057e-01, 3.05309881e-01, 3.24476937e-09, + 3.87647711e-01, -3.24879754e-01, 1.75076689e-08, ], + [3.43230048e-07, -7.51007690e-08, -5.61467929e-02, -2.63388409e-07, 3.24476937e-09, -1.58028581e-01, + -7.98416446e-08, 7.18560128e-08, 2.14175438e-01, ], + [-9.81525023e-01, -1.60733254e+00, 1.43377752e-06, -3.22535721e-01, 3.87647711e-01, -7.98416446e-08, + 1.30406122e+00, 1.21968557e+00, -6.20971299e-08, ], + [-1.64846785e+00, -1.28204482e+00, -1.34402454e-06, 4.28784515e-01, -3.24879754e-01, 7.18560128e-08, + 1.21968557e+00, 1.60692447e+00, 5.80145901e-08, ], + [3.31603713e-07, -7.55222799e-08, 4.76637885e-01, -2.69506583e-07, 1.75076689e-08, 2.14175438e-01, + -6.20971299e-08, 5.80145901e-08, -6.90811867e-01, ], + ]) + hessian_prop = db.DenseMatrixProperty(hessian_props[0]) + hessian_prop.link(properties) + hessian = hessian_prop.get_data() + assert hessian.shape == (9, 9) + assert np.allclose(ref_hessian, hessian, atol=1e-1) + # Normal modes + ref_normal_modes = np.array([ + [-6.57555073e-02, 1.26620092e-02, 4.40344617e-02, ], + [1.75882705e-03, -4.50720615e-03, 4.28132636e-02, ], + [6.25238992e-18, -4.82124061e-18, 2.08537396e-18, ], + [6.80766002e-01, -6.84858319e-01, -6.00657576e-02, ], + [3.90929440e-01, 4.12321086e-01, 4.06809503e-02, ], + [2.59753169e-16, -1.21379778e-17, -1.53607653e-17, ], + [3.63010525e-01, 4.83866667e-01, -6.38919610e-01, ], + [-4.18848354e-01, -3.40775505e-01, -7.20281511e-01, ], + [-4.33840517e-16, 1.20815437e-17, 2.94634947e-17, ], + ]) + normal_mode_prop = db.DenseMatrixProperty(normal_modes_props[0]) + normal_mode_prop.link(properties) + normal_modes = normal_mode_prop.get_data() + assert normal_modes.shape == (9, 3) + assert np.allclose(np.abs(ref_normal_modes), np.abs(normal_modes), atol=1e-1) + + # Frequencies + ref_frequencies = [-0.00054616, 0.00440581, 0.00628707] + frequencies_prop = db.VectorProperty(frequencies_props[0]) + frequencies_prop.link(properties) + frequencies = frequencies_prop.get_data() + assert len(frequencies) == 3 + self.assertAlmostEqual(ref_frequencies[0], frequencies[0], delta=1e-1) + self.assertAlmostEqual(ref_frequencies[1], frequencies[1], delta=1e-1) + self.assertAlmostEqual(ref_frequencies[2], frequencies[2], delta=1e-1) + + # Gibbs free energy + gibbs_free_energy_prop = db.NumberProperty(gibbs_free_energy_props[0]) + gibbs_free_energy_prop.link(properties) + gibbs_free_energy = gibbs_free_energy_prop.get_data() + self.assertAlmostEqual(gibbs_free_energy, -3.807695814121228, delta=1e-1) + + # Gibbs energy correction + gibbs_energy_correction_prop = db.NumberProperty(gibbs_energy_correction_props[0]) + gibbs_energy_correction_prop.link(properties) + gibbs_energy_correction = gibbs_energy_correction_prop.get_data() + self.assertAlmostEqual(gibbs_energy_correction, 0.01596957901190521, delta=1e-5) + + @skip_without('database', 'readuct') + def test_valid_minimum(self): + from scine_puffin.jobs.scine_geometry_validation import ScineGeometryValidation + import scine_database as db + + # Setup DB for calculation + water = os.path.join(resource_path(), "water.xyz") + structure = add_structure(self.manager, water, db.Label.MINIMUM_OPTIMIZED) + structure.set_graph( + "masm_cbor_graph", + "pGFhgaVhYQBhYwJhb4GCAAFhcqNhbIKBAIEBYmxygYIAAWFzgYIAAWFzAWFjD2FnomFFgoMAAgCDAQIAYVqDAQEIYXaDAgAA") + structure.set_graph("masm_decision_list", "") + + model = db.Model('dftb3', 'dftb3', '') + model.program = "sparrow" + job = db.Job('scine_geometry_validation') + calculation = add_calculation(self.manager, model, job, [structure.id()]) + + # Run calculation/job + config = self.get_configuration() + job = ScineGeometryValidation() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + success = job.run(self.manager, calculation, config) + assert success + + # Check results + assert calculation.get_status() == db.Status.COMPLETE + assert structure.has_property("electronic_energy") + assert structure.has_property("bond_orders") + assert structure.has_property("hessian") + assert structure.has_property("normal_modes") + assert structure.has_property("frequencies") + assert structure.has_property("gibbs_free_energy") + assert structure.has_property("gibbs_energy_correction") + energy_props = structure.get_properties("electronic_energy") + bond_props = structure.get_properties("bond_orders") + hessian_props = structure.get_properties("hessian") + normal_modes_props = structure.get_properties("normal_modes") + frequencies_props = structure.get_properties("frequencies") + gibbs_free_energy_props = structure.get_properties("gibbs_free_energy") + gibbs_energy_correction_props = structure.get_properties("gibbs_energy_correction") + + assert len(energy_props) == 1 + assert len(bond_props) == 1 + assert len(hessian_props) == 1 + assert len(normal_modes_props) == 1 + assert len(frequencies_props) == 1 + assert len(gibbs_free_energy_props) == 1 + assert len(gibbs_energy_correction_props) == 1 + results = calculation.get_results() + assert len(results.property_ids) == 7 + assert energy_props[0] in results.property_ids + assert bond_props[0] in results.property_ids + assert hessian_props[0] in results.property_ids + assert frequencies_props[0] in results.property_ids + assert normal_modes_props[0] in results.property_ids + assert gibbs_free_energy_props[0] in results.property_ids + assert gibbs_energy_correction_props[0] in results.property_ids + + # Check generated properties + # Energy + properties = self.manager.get_collection("properties") + energy = db.NumberProperty(energy_props[0]) + energy.link(properties) + self.assertAlmostEqual(energy.get_data(), -4.061143327, delta=1e-1) + # Bond orders + bond_prop = db.SparseMatrixProperty(bond_props[0]) + bond_prop.link(properties) + bond = bond_prop.get_data() + assert bond.shape == (3, 3) + self.assertAlmostEqual(bond[0, 1], +0.881347660, delta=1e-1) + self.assertAlmostEqual(bond[0, 2], +0.881347660, delta=1e-1) + self.assertAlmostEqual(bond[1, 0], +0.881347660, delta=1e-1) + self.assertAlmostEqual(bond[2, 0], +0.881347660, delta=1e-1) + self.assertAlmostEqual(bond[0, 0], +0.000000000) + self.assertAlmostEqual(bond[1, 1], +0.000000000) + self.assertAlmostEqual(bond[2, 2], +0.000000000) + self.assertAlmostEqual(bond[2, 1], +0.003157004, delta=1e-1) + self.assertAlmostEqual(bond[1, 2], +0.003157004, delta=1e-1) + # Hessian + ref_hessian = np.array( + [[4.33647320e-01, -1.92895860e-02, 6.74264637e-09, -3.82458865e-01, -2.36584777e-02, -2.82825003e-07, + -5.11883523e-02, 4.29483194e-02, 2.77110166e-07], + [-1.92895860e-02, 4.33647320e-01, 6.74262344e-09, 4.30982652e-02, -5.13382980e-02, + 2.77110141e-07, -2.38084235e-02, -3.82308920e-01, -2.82824980e-07], + [6.74264637e-09, 6.74262344e-09, 5.50602001e-02, -5.27561104e-09, -1.46703795e-09, - + 2.75300925e-02, -1.46703552e-09, -5.27558506e-09, -2.75300925e-02], + [-3.82458865e-01, 4.30982652e-02, -5.27561104e-09, 3.92889518e-01, -2.32293721e-02, + 2.55186625e-07, -1.04307157e-02, -1.98690895e-02, -2.50496798e-07], + [-2.36584777e-02, -5.13382980e-02, -1.46703795e-09, -2.32293721e-02, + 6.16190275e-02, -1.38730691e-08, 4.68877925e-02, -1.02807711e-02, 1.48980846e-08], + [-2.82825003e-07, 2.77110141e-07, -2.75300925e-02, 2.55186625e-07, -1.38730691e-08, + 1.47316279e-02, 2.76383853e-08, -2.63237086e-07, 1.27984576e-02], + [-5.11883523e-02, -2.38084235e-02, -1.46703552e-09, -1.04307157e-02, 4.68877925e-02, + 2.76383853e-08, 6.16190282e-02, -2.30794283e-02, -2.66133757e-08], + [4.29483194e-02, -3.82308920e-01, -5.27558506e-09, -1.98690895e-02, - + 1.02807711e-02, -2.63237086e-07, -2.30794283e-02, 3.92589630e-01, 2.67926909e-07], + [2.77110166e-07, -2.82824980e-07, -2.75300925e-02, -2.50496798e-07, 1.48980846e-08, 1.27984576e-02, + -2.66133757e-08, 2.67926909e-07, 1.47316269e-02]]) + hessian_prop = db.DenseMatrixProperty(hessian_props[0]) + hessian_prop.link(properties) + hessian = hessian_prop.get_data() + assert hessian.shape == (9, 9) + assert np.allclose(ref_hessian, hessian, atol=1e-1) + # Normal modes + ref_normal_modes = np.array( + [[-4.47497118e-02, -3.85208122e-02, -4.31593322e-02], + [-4.47122166e-02, -3.88386563e-02, 4.29126332e-02], + [-8.49613600e-18, 5.24295770e-20, 2.59912639e-19], + [2.95571230e-02, 6.81190413e-01, 6.85647964e-01], + [6.80781838e-01, -6.97265040e-02, -5.54050376e-04], + [1.70377797e-16, 1.60989274e-16, 1.17463267e-16], + [6.80781838e-01, -6.97265040e-02, -5.54050376e-04], + [2.89619394e-02, 6.86235743e-01, -6.80623863e-01], + [-2.83048332e-16, -1.53196255e-16, -2.05063421e-16]] + ) + normal_mode_prop = db.DenseMatrixProperty(normal_modes_props[0]) + normal_mode_prop.link(properties) + normal_modes = normal_mode_prop.get_data() + assert normal_modes.shape == (9, 3) + assert np.allclose(np.abs(ref_normal_modes), np.abs(normal_modes), atol=1e-1) + + # Frequencies + ref_frequencies = [0.00124259, 0.00233505, 0.00246372] + frequencies_prop = db.VectorProperty(frequencies_props[0]) + frequencies_prop.link(properties) + frequencies = frequencies_prop.get_data() + assert len(frequencies) == 3 + self.assertAlmostEqual(ref_frequencies[0], frequencies[0], delta=1e-1) + self.assertAlmostEqual(ref_frequencies[1], frequencies[1], delta=1e-1) + self.assertAlmostEqual(ref_frequencies[2], frequencies[2], delta=1e-1) + + # Gibbs free energy + gibbs_free_energy_prop = db.NumberProperty(gibbs_free_energy_props[0]) + gibbs_free_energy_prop.link(properties) + gibbs_free_energy = gibbs_free_energy_prop.get_data() + self.assertAlmostEqual(gibbs_free_energy, -4.060596474205113, delta=1e-1) + + # Gibbs energy correction + gibbs_energy_correction_prop = db.NumberProperty(gibbs_energy_correction_props[0]) + gibbs_energy_correction_prop.link(properties) + gibbs_energy_correction = gibbs_energy_correction_prop.get_data() + self.assertAlmostEqual(gibbs_energy_correction, 0.0005468527260594769, delta=1e-5) + + @skip_without('database', 'readuct') + def test_fail_to_optimize_non_valid_minimum(self): + # fails because of different graph + from scine_puffin.jobs.scine_geometry_validation import ScineGeometryValidation + import scine_database as db + + # Setup DB for calculation + h2o2 = os.path.join(resource_path(), "h2o2_distorted.xyz") + ref_graph = "pGFhgaVhYQBhYwNhb4GDAAECYXKjYWyDgQCBAYECYmxygoIAAYECYXOCgg" + \ + "ABgQJhcwNhYw9hZ6JhRYODAAMAgwEDAIMCAwBhWoQBAQgIYXaDAQIB" + structure = add_structure(self.manager, h2o2, db.Label.MINIMUM_OPTIMIZED) + structure.set_graph("masm_cbor_graph", ref_graph) + structure.set_graph("masm_decision_list", "") + + model = db.Model('dftb3', 'dftb3', '') + model.program = "sparrow" + job = db.Job('scine_geometry_validation') + settings = { + "optimization_attempts": 2 + } + + calculation = add_calculation(self.manager, model, job, [structure.id()], settings) + + # Run calculation/job + config = self.get_configuration() + job = ScineGeometryValidation() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + success = job.run(self.manager, calculation, config) + assert not success + # Check comment of calculation + ref_comment = "Scine Geometry Validation Job: End structure does not match starting structure." + \ + "\nError: Scine Geometry Validation Job failed with message:" + \ + "\nStructure could not be validated to be a minimum. Hessian information is stored anyway." + assert calculation.get_comment() == ref_comment + + # Check results + assert calculation.get_status() == db.Status.FAILED + assert structure.has_property("electronic_energy") + assert structure.has_property("hessian") + assert structure.has_property("normal_modes") + assert structure.has_property("frequencies") + assert structure.has_property("gibbs_free_energy") + assert structure.has_property("gibbs_energy_correction") + energy_props = structure.get_properties("electronic_energy") + hessian_props = structure.get_properties("hessian") + normal_modes_props = structure.get_properties("normal_modes") + frequencies_props = structure.get_properties("frequencies") + gibbs_free_energy_props = structure.get_properties("gibbs_free_energy") + gibbs_energy_correction_props = structure.get_properties("gibbs_energy_correction") + + assert len(energy_props) == 1 + assert len(hessian_props) == 1 + assert len(normal_modes_props) == 1 + assert len(frequencies_props) == 1 + assert len(gibbs_free_energy_props) == 1 + assert len(gibbs_energy_correction_props) == 1 + results = calculation.get_results() + assert len(results.property_ids) == 6 + assert energy_props[0] in results.property_ids + assert hessian_props[0] in results.property_ids + assert frequencies_props[0] in results.property_ids + assert normal_modes_props[0] in results.property_ids + assert gibbs_free_energy_props[0] in results.property_ids + assert gibbs_energy_correction_props[0] in results.property_ids + + # Check generated properties + # Energy + properties = self.manager.get_collection("properties") + energy = db.NumberProperty(energy_props[0]) + energy.link(properties) + self.assertAlmostEqual(energy.get_data(), -7.168684600560611, delta=1e-1) + # Normal modes + ref_normal_modes = np.array([ + [-4.31688799e-02, -6.91523883e-03, -1.82492631e-02, -3.86771134e-02, -3.45645862e-02, 3.81068441e-03, ], + [-1.65111999e-03, 2.83228734e-03, 2.10319980e-02, -4.31722218e-02, 1.36502269e-03, -5.94798269e-02, ], + [-2.99343119e-02, 1.78068975e-01, -2.59408246e-04, -1.58605276e-02, -1.23374087e-02, 2.33946275e-03, ], + [4.03868933e-02, -1.78581009e-03, -4.13741993e-03, -6.89669850e-03, -1.88649585e-02, 9.27050089e-04, ], + [-3.48727166e-03, -6.65257470e-03, -4.16577386e-02, 5.81676804e-03, 2.25155821e-03, -2.87158874e-04, ], + [-2.68061138e-02, -1.69761922e-01, 5.77588045e-02, -5.30648270e-03, 1.05427368e-02, -9.30432318e-04, ], + [-2.90044217e-02, 7.59482476e-02, 5.02213061e-02, 1.25262116e-01, 9.65015716e-01, -5.88417133e-02, ], + [7.31645599e-02, 6.21687101e-02, 3.05135914e-01, 5.98158280e-01, -1.16896572e-01, -1.63631824e-02, ], + [9.47125787e-01, -4.54672494e-03, -7.34029615e-02, -1.01582077e-01, 3.33256908e-02, -7.34742518e-03, ], + [7.31645599e-02, 6.21687101e-02, 3.05135914e-01, 5.98158280e-01, -1.16896572e-01, -1.63631824e-02, ], + [8.40020643e-03, -1.52700207e-03, 2.22688135e-02, -5.19280259e-03, 5.94884190e-02, 9.65080322e-01, ], + [-4.64510457e-02, -1.27316095e-01, -8.39319373e-01, 4.37578701e-01, -4.83778945e-03, -1.50189582e-02, ], + ]) + + normal_mode_prop = db.DenseMatrixProperty(normal_modes_props[0]) + normal_mode_prop.link(properties) + normal_modes = normal_mode_prop.get_data() + assert normal_modes.shape == (12, 6) + assert np.allclose(np.abs(ref_normal_modes), np.abs(normal_modes), atol=1e-1) + + # Frequencies + ref_frequencies = [-0.00117588, 0.00064638, 0.00083982, 0.00106649, 0.00156229, 0.00239611] + frequencies_prop = db.VectorProperty(frequencies_props[0]) + frequencies_prop.link(properties) + frequencies = frequencies_prop.get_data() + assert len(frequencies) == 6 + for i, ref in enumerate(ref_frequencies): + self.assertAlmostEqual(ref, frequencies[i], delta=1e-1) + + # Gibbs free energy + gibbs_free_energy_prop = db.NumberProperty(gibbs_free_energy_props[0]) + gibbs_free_energy_prop.link(properties) + gibbs_free_energy = gibbs_free_energy_prop.get_data() + self.assertAlmostEqual(gibbs_free_energy, -7.170441957688447, delta=1e-1) + + # Gibbs energy correction + gibbs_energy_correction_prop = db.NumberProperty(gibbs_energy_correction_props[0]) + gibbs_energy_correction_prop.link(properties) + gibbs_energy_correction = gibbs_energy_correction_prop.get_data() + self.assertAlmostEqual(gibbs_energy_correction, -0.001757357127836201, delta=1e-5) + + # Graph + assert structure.get_graph("masm_cbor_graph") == ref_graph + + @skip_without('database', 'readuct') + @pytest.mark.filterwarnings("ignore:.+The structure had a graph already") + def test_optimize_non_valid_minimum(self): + # fails because of different graph + from scine_puffin.jobs.scine_geometry_validation import ScineGeometryValidation + import scine_database as db + import scine_utilities as utils + + # Setup DB for calculation + water = os.path.join(resource_path(), "water_distorted_2.xyz") + ref_graph = "pGFhgaVhYQBhYwJhb4GCAAFhcqNhbIKBAIEBYmxygYIAAWFzgYIAAWFzAWFjD2FnomFFgoMAAgCDAQIAYVqDAQEIYXaDAgAA" + structure = add_structure(self.manager, water, db.Label.MINIMUM_OPTIMIZED) + structure.set_graph("masm_cbor_graph", ref_graph) + structure.set_graph("masm_decision_list", "") + + model = db.Model('dftb3', 'dftb3', '') + model.program = "sparrow" + job = db.Job('scine_geometry_validation') + settings = { + "optimization_attempts": 2 + } + + calculation = add_calculation(self.manager, model, job, [structure.id()], settings) + + # Run calculation/job + config = self.get_configuration() + job = ScineGeometryValidation() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + success = job.run(self.manager, calculation, config) + assert success + + # Check results + assert calculation.get_status() == db.Status.COMPLETE + assert structure.has_property("electronic_energy") + assert structure.has_property("bond_orders") + assert structure.has_property("hessian") + assert structure.has_property("normal_modes") + assert structure.has_property("frequencies") + assert structure.has_property("gibbs_free_energy") + assert structure.has_property("gibbs_energy_correction") + assert structure.has_property("position_shift") + energy_props = structure.get_properties("electronic_energy") + bond_props = structure.get_properties("bond_orders") + hessian_props = structure.get_properties("hessian") + normal_modes_props = structure.get_properties("normal_modes") + frequencies_props = structure.get_properties("frequencies") + gibbs_free_energy_props = structure.get_properties("gibbs_free_energy") + gibbs_energy_correction_props = structure.get_properties("gibbs_energy_correction") + position_shift_props = structure.get_properties("position_shift") + + assert len(energy_props) == 1 + assert len(bond_props) == 1 + assert len(hessian_props) == 1 + assert len(normal_modes_props) == 1 + assert len(frequencies_props) == 1 + assert len(gibbs_free_energy_props) == 1 + assert len(gibbs_energy_correction_props) == 1 + assert len(position_shift_props) == 1 + results = calculation.get_results() + assert len(results.property_ids) == 8 + assert energy_props[0] in results.property_ids + assert bond_props[0] in results.property_ids + assert hessian_props[0] in results.property_ids + assert frequencies_props[0] in results.property_ids + assert normal_modes_props[0] in results.property_ids + assert gibbs_free_energy_props[0] in results.property_ids + assert gibbs_energy_correction_props[0] in results.property_ids + assert position_shift_props[0] in results.property_ids + + # Check generated properties + # Energy + properties = self.manager.get_collection("properties") + energy = db.NumberProperty(energy_props[0]) + energy.link(properties) + self.assertAlmostEqual(energy.get_data(), -4.071575644461411, delta=1e-1) + # Bond orders + bond_prop = db.SparseMatrixProperty(bond_props[0]) + bond_prop.link(properties) + bond = bond_prop.get_data() + assert bond.shape == (3, 3) + self.assertAlmostEqual(bond[1, 0], +0.8541060592278684, delta=1e-1) + self.assertAlmostEqual(bond[2, 0], +0.8541060835647227, delta=1e-1) + self.assertAlmostEqual(bond[0, 1], +0.8541060592278684, delta=1e-1) + self.assertAlmostEqual(bond[0, 2], +0.8541060835647227, delta=1e-1) + self.assertAlmostEqual(bond[2, 1], +0.015945768676323538, delta=1e-1) + self.assertAlmostEqual(bond[1, 2], +0.015945768676323538, delta=1e-1) + self.assertAlmostEqual(bond[0, 0], +0.000000000) + self.assertAlmostEqual(bond[1, 1], +0.000000000) + self.assertAlmostEqual(bond[2, 2], +0.000000000) + # Hessian + ref_hessian = np.array([ + [6.81547839e-01, -1.01826427e-01, 2.01519849e-10, -4.78354633e-01, -1.53645723e-01, -3.88607902e-07, + -2.03194076e-01, 2.55471871e-01, 3.91793089e-07, ], + [-1.01826427e-01, 4.23459677e-01, 6.58626116e-10, -9.28932439e-02, -7.42712067e-02, 1.39593626e-07, + 1.94720190e-01, -3.49188697e-01, -1.31228326e-07, ], + [2.01519849e-10, 6.58626116e-10, 4.44106624e-06, -5.17270348e-10, -1.75271200e-10, -2.02383752e-06, + 3.15750472e-10, -4.83355620e-10, -2.15141967e-06, ], + [-4.78354633e-01, -9.28932439e-02, -5.17270348e-10, 4.77068864e-01, 1.20898325e-01, 2.27650618e-07, + 1.28613515e-03, -2.80051173e-02, -2.30263842e-07, ], + [-1.53645723e-01, -7.42712067e-02, -1.75271200e-10, 1.20898325e-01, 6.68427367e-02, 5.48141018e-08, + 3.27473460e-02, 7.42862870e-03, -5.87412969e-08, ], + [-3.88607902e-07, 1.39593626e-07, -2.02383752e-06, 2.27650618e-07, 5.48141018e-08, 1.86869915e-06, + 1.60957288e-07, -1.94407719e-07, 2.25367506e-08, ], + [-2.03194076e-01, 1.94720190e-01, 3.15750472e-10, 1.28613515e-03, 3.27473460e-02, 1.60957288e-07, + 2.01908446e-01, -2.27467221e-01, -1.61529252e-07, ], + [2.55471871e-01, -3.49188697e-01, -4.83355620e-10, -2.80051173e-02, 7.42862870e-03, -1.94407719e-07, + -2.27467221e-01, 3.41760135e-01, 1.89969615e-07, ], + [3.91793089e-07, -1.31228326e-07, -2.15141967e-06, -2.30263842e-07, -5.87412969e-08, 2.25367506e-08, + -1.61529252e-07, 1.89969615e-07, 1.99567548e-06, ], + ]) + hessian_prop = db.DenseMatrixProperty(hessian_props[0]) + hessian_prop.link(properties) + hessian = hessian_prop.get_data() + assert hessian.shape == (9, 9) + assert np.allclose(ref_hessian, hessian, atol=1e-1) + # Normal modes + ref_normal_modes = np.array([ + [-2.89597029e-02, -3.15134055e-02, 6.80792263e-02, ], + [-6.12657343e-02, -3.78163196e-02, -3.58186536e-02, ], + [4.20375520e-18, 2.34897827e-18, 1.00104275e-17, ], + [-1.79731098e-01, 7.85076713e-01, -5.36167737e-01, ], + [5.37311156e-01, 1.05145171e-01, 8.02545212e-03, ], + [-1.04237003e-16, -1.01785409e-16, 7.12699448e-17, ], + [6.39425796e-01, -2.84845564e-01, -5.44494573e-01, ], + [4.35196516e-01, 4.95135906e-01, 5.60545477e-01, ], + [-1.32320360e-16, 7.19185833e-17, -4.43320067e-16, ], + ]) + normal_mode_prop = db.DenseMatrixProperty(normal_modes_props[0]) + normal_mode_prop.link(properties) + normal_modes = normal_mode_prop.get_data() + assert normal_modes.shape == (9, 3) + assert np.allclose(np.abs(ref_normal_modes), np.abs(normal_modes), atol=1e-1) + + # Frequencies + ref_frequencies = [0.00099327, 0.00261741, 0.00276749] + frequencies_prop = db.VectorProperty(frequencies_props[0]) + frequencies_prop.link(properties) + frequencies = frequencies_prop.get_data() + assert len(frequencies) == 3 + self.assertAlmostEqual(ref_frequencies[0], frequencies[0], delta=1e-1) + self.assertAlmostEqual(ref_frequencies[1], frequencies[1], delta=1e-1) + self.assertAlmostEqual(ref_frequencies[2], frequencies[2], delta=1e-1) + + # Gibbs free energy + gibbs_free_energy_prop = db.NumberProperty(gibbs_free_energy_props[0]) + gibbs_free_energy_prop.link(properties) + gibbs_free_energy = gibbs_free_energy_prop.get_data() + self.assertAlmostEqual(gibbs_free_energy, -4.069523427319414, delta=1e-1) + + # Gibbs energy correction + gibbs_energy_correction_prop = db.NumberProperty(gibbs_energy_correction_props[0]) + gibbs_energy_correction_prop.link(properties) + gibbs_energy_correction = gibbs_energy_correction_prop.get_data() + self.assertAlmostEqual(gibbs_energy_correction, 0.002052217141994994, delta=1e-5) + + # Position Shift + ref_position_shift = np.array([ + [-2.28040644e-01, -6.57123762e-01, 4.61543866e-17], + [-3.75344241e-01, -1.57403825e-01, 1.12412725e-16], + [-3.41478178e-01, -1.30335476e-01, 1.14223117e-16]]) + position_shift_prop = db.DenseMatrixProperty(position_shift_props[0]) + position_shift_prop.link(properties) + position_shift = position_shift_prop.get_data() + assert position_shift.shape == (3, 3) + assert np.allclose(ref_position_shift, position_shift, atol=1e-1) + new_positions = structure.get_atoms().positions + water_atoms, _ = utils.io.read(water) + assert np.allclose(water_atoms.positions, new_positions - position_shift) diff --git a/scine_puffin/tests/jobs/test_scine_hessian.py b/scine_puffin/tests/jobs/test_scine_hessian.py index 966f5f2..534e60b 100644 --- a/scine_puffin/tests/jobs/test_scine_hessian.py +++ b/scine_puffin/tests/jobs/test_scine_hessian.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/jobs/test_scine_irc_scan_job.py b/scine_puffin/tests/jobs/test_scine_irc_scan_job.py index a25cd06..aede1b4 100644 --- a/scine_puffin/tests/jobs/test_scine_irc_scan_job.py +++ b/scine_puffin/tests/jobs/test_scine_irc_scan_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/jobs/test_scine_react_complex_afir_job.py b/scine_puffin/tests/jobs/test_scine_react_complex_afir_job.py index 551eb7a..0d92d34 100644 --- a/scine_puffin/tests/jobs/test_scine_react_complex_afir_job.py +++ b/scine_puffin/tests/jobs/test_scine_react_complex_afir_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -148,7 +148,7 @@ def test_energy_and_structure(self): elementary_steps = self.manager.get_collection("elementary_steps") assert calculation.get_status() == db.Status.COMPLETE results = calculation.get_results() - assert len(results.property_ids) == 10 + assert len(results.property_ids) == 11 assert len(results.structure_ids) == 3 + 2 # re-optimized reactants (x2) + complex + TS + product assert len(results.elementary_step_ids) == 2 new_elementary_step_one = db.ElementaryStep(results.elementary_step_ids[0], elementary_steps) diff --git a/scine_puffin/tests/jobs/test_scine_react_complex_nt2_job.py b/scine_puffin/tests/jobs/test_scine_react_complex_nt2_job.py index 8cfe0f1..fb11c34 100644 --- a/scine_puffin/tests/jobs/test_scine_react_complex_nt2_job.py +++ b/scine_puffin/tests/jobs/test_scine_react_complex_nt2_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -155,7 +155,7 @@ def test_energy_and_structure(self): elementary_steps = self.manager.get_collection("elementary_steps") assert calculation.get_status() == db.Status.COMPLETE results = calculation.get_results() - assert len(results.property_ids) == 10 + assert len(results.property_ids) == 11 assert len(results.structure_ids) == 3 + 2 # re-optimized reactants (x2) + complex + TS + product assert len(results.elementary_step_ids) == 2 assert structures.count("{}") == 3 + 3 + 2 @@ -328,7 +328,7 @@ def test_structure_deduplication(self): elementary_steps = self.manager.get_collection("elementary_steps") assert calculation.get_status() == db.Status.COMPLETE results = calculation.get_results() - assert len(results.property_ids) == 10 + assert len(results.property_ids) == 11 assert len(results.structure_ids) == 3 + 2 # re-optimized reactants (x2) + complex + TS + product assert len(results.elementary_step_ids) == 2 assert structures.count("{}") == 3 + 3 + 2 @@ -494,7 +494,7 @@ def test_mep_storage(self): elementary_steps = self.manager.get_collection("elementary_steps") assert calculation.get_status() == db.Status.COMPLETE results = calculation.get_results() - assert len(results.property_ids) == 10 + assert len(results.property_ids) == 11 # Structure counts: (complex + TS + product) + re-optimized reactants (x2) assert len(results.structure_ids) == 3 + 2 assert len(results.elementary_step_ids) == 2 @@ -659,7 +659,7 @@ def test_full_storage(self): elementary_steps = self.manager.get_collection("elementary_steps") assert calculation.get_status() == db.Status.COMPLETE results = calculation.get_results() - assert len(results.property_ids) == 10 + assert len(results.property_ids) == 11 # Structure counts: (complex + TS + product) + re-optimized reactants (x2) assert len(results.structure_ids) == 3 + 2 assert len(results.elementary_step_ids) == 2 @@ -683,7 +683,7 @@ def test_full_storage(self): selection = {"label": "minimum_guess"} n_opt_structures = structures.count(json.dumps(selection)) assert n_opt_structures >= 520 # Subject to numerics and changes in GeoOpt algorithm - assert n_opt_structures <= 560 # Subject to numerics and changes in GeoOpt algorithm + assert n_opt_structures <= 1560 # Subject to numerics and changes in GeoOpt algorithm selection = {"label": "reactive_complex_scanned"} n_nt_structures = structures.count(json.dumps(selection)) assert n_nt_structures >= 50 # Subject to numerics and changes in NT2 algorithm @@ -824,5 +824,143 @@ def test_charged_reactants(self): print(calculation.get_comment()) raise e - assert "The chosen spin multiplicity (1) is not compatible with the molecular charge (0)."\ + assert "The chosen spin multiplicity (1) is not compatible with the molecular charge (0)." \ not in calculation.get_comment() + + @skip_without('database', 'readuct', 'molassembler', 'xtb_wrapper') + def test_elementary_step_not_from_starting_structures(self): + from scine_puffin.jobs.scine_react_complex_nt2 import ScineReactComplexNt2 + import scine_database as db + + model = db.Model('gfn2', 'gfn2', '') + model.program = "xtb" + model.solvation = "gbsa" + model.solvent = "water" + + reactant_one_path = os.path.join(resource_path(), "h2o2.xyz") + reactant_two_path = os.path.join(resource_path(), "hio3.xyz") + + reactant_one_guess = add_structure(self.manager, reactant_one_path, db.Label.MINIMUM_OPTIMIZED, model=model) + reactant_two_guess = add_structure(self.manager, reactant_two_path, db.Label.MINIMUM_OPTIMIZED, model=model) + graph_one = json.load(open(os.path.join(resource_path(), "h2o2.json"), "r")) + graph_two = json.load(open(os.path.join(resource_path(), "hio3.json"), "r")) + reactant_one_guess.set_graph("masm_cbor_graph", graph_one["masm_cbor_graph"]) + reactant_one_guess.set_graph("masm_idx_map", graph_one["masm_idx_map"]) + reactant_one_guess.set_graph("masm_decision_list", graph_one["masm_decision_list"]) + reactant_two_guess.set_graph("masm_cbor_graph", graph_two["masm_cbor_graph"]) + reactant_two_guess.set_graph("masm_idx_map", graph_two["masm_idx_map"]) + reactant_two_guess.set_graph("masm_decision_list", graph_two["masm_decision_list"]) + + job = db.Job('scine_react_complex_nt2') + settings = { + "self_consistence_criterion": 0.00000001, + "max_scf_iterations": 2000, + "nt_convergence_max_iterations": 600, + "nt_nt_total_force_norm": 0.1, + "nt_sd_factor": 1.0, + "nt_nt_use_micro_cycles": True, + "nt_nt_fixed_number_of_micro_cycles": True, + "nt_nt_number_of_micro_cycles": 10, + "nt_nt_filter_passes": 10, + "tsopt_convergence_max_iterations": 1000, + "tsopt_convergence_step_max_coefficient": 0.002, + "tsopt_convergence_step_rms": 0.001, + "tsopt_convergence_gradient_max_coefficient": 0.0002, + "tsopt_convergence_gradient_rms": 0.0001, + "tsopt_convergence_requirement": 3, + "tsopt_convergence_delta_value": 0.000001, + "tsopt_optimizer": "bofill", + "tsopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "tsopt_bofill_trust_radius": 0.2, + "tsopt_bofill_follow_mode": 0, + "irc_convergence_max_iterations": 60, + "irc_sd_factor": 0.6, + "irc_sd_use_trust_radius": True, + "irc_sd_trust_radius": 0.1, + "irc_sd_dynamic_multiplier": 1.4, + "irc_irc_initial_step_size": 0.3, + "irc_stop_on_error": False, + "irc_convergence_step_max_coefficient": 0.002, + "irc_convergence_step_rms": 0.001, + "irc_convergence_gradient_max_coefficient": 0.0002, + "irc_convergence_gradient_rms": 0.0001, + "irc_convergence_delta_value": 0.000001, + "irc_irc_coordinate_system": "cartesianWithoutRotTrans", + "ircopt_convergence_max_iterations": 1000, + "ircopt_convergence_step_max_coefficient": 0.002, + "ircopt_convergence_step_rms": 0.001, + "ircopt_convergence_gradient_max_coefficient": 0.0002, + "ircopt_convergence_gradient_rms": 0.0001, + "ircopt_convergence_requirement": 3, + "ircopt_convergence_delta_value": 0.000001, + "ircopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "ircopt_bfgs_use_trust_radius": True, + "ircopt_bfgs_trust_radius": 0.2, + "opt_convergence_max_iterations": 1000, + "opt_convergence_step_max_coefficient": 0.002, + "opt_convergence_step_rms": 0.001, + "opt_convergence_gradient_max_coefficient": 0.0002, + "opt_convergence_gradient_rms": 0.0001, + "opt_convergence_requirement": 3, + "opt_convergence_delta_value": 0.000001, + "opt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "opt_bfgs_use_trust_radius": True, + "opt_bfgs_trust_radius": 0.4, + "rc_x_alignment_0": [ + 0.6412339068165366, + 0.7013288980835932, + 0.31137895475072497, + -0.7013288980835932, + 0.700309491996055, + -0.133057852579546, + -0.31137895475072497, + -0.133057852579546, + 0.9409244148204816 + ], + "rc_x_alignment_1": [ + 0.3310448646119655, + 0.07971826739747179, + -0.9402416154676363, + -0.07971826739747179, + 0.9952255537541873, + 0.05631247639570067, + 0.9402416154676363, + 0.05631247639570067, + 0.33581931085777805 + ], + "rc_x_rotation": 2.0943951023931953, + "rc_x_spread": 2.8723837106958987, + "rc_displacement": 0.0, + "nt_nt_associations": [0, 6], + "nt_nt_dissociations": [0, 2], + "rc_minimal_spin_multiplicity": False + } + + calculation = add_calculation(self.manager, model, job, [reactant_one_guess.id(), reactant_two_guess.id()], + settings) + # Run calculation/job + config = self.get_configuration() + job = ScineReactComplexNt2() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + + # Check results + structures = self.manager.get_collection("structures") + properties = self.manager.get_collection("properties") + elementary_steps = self.manager.get_collection("elementary_steps") + assert calculation.get_status() == db.Status.COMPLETE + results = calculation.get_results() + assert len(results.property_ids) == 9 + # Structure counts: TS + product + new reactant + assert len(results.structure_ids) == 3 + assert len(results.elementary_step_ids) == 1 + new_elementary_step = db.ElementaryStep(results.elementary_step_ids[-1], elementary_steps) + product = db.Structure(new_elementary_step.get_reactants(db.Side.RHS)[1][0], structures) + assert product.has_property('bond_orders') + assert product.has_graph('masm_cbor_graph') + new_ts = db.Structure(new_elementary_step.get_transition_state(), structures) + assert new_ts.has_property('electronic_energy') + energy_props = new_ts.get_properties("electronic_energy") + assert energy_props[0] in results.property_ids + energy = db.NumberProperty(energy_props[0], properties) + self.assertAlmostEqual(energy.get_data(), -25.38936573516693, delta=1e-4) diff --git a/scine_puffin/tests/jobs/test_scine_react_complex_nt2_propensity_job.py b/scine_puffin/tests/jobs/test_scine_react_complex_nt2_propensity_job.py new file mode 100644 index 0000000..bc05048 --- /dev/null +++ b/scine_puffin/tests/jobs/test_scine_react_complex_nt2_propensity_job.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +import os +import json + +from ..testcases import ( + JobTestCase, + skip_without +) + +from ..db_setup import ( + add_calculation, + add_compound_and_structure, +) + +from ..resources import resource_path + + +class ScineReactComplexNt2JobTest(JobTestCase): + + @skip_without('database', 'readuct', 'molassembler') + def test_propensity(self): + # import Job + from scine_puffin.jobs.scine_react_complex_nt2 import ScineReactComplexNt2 + import scine_database as db + + # The parallel numerical Hessian makes problems on some machines + # Therefore, we enforce the use of the serial Hessian + omp = os.getenv("OMP_NUM_THREADS") + os.environ["OMP_NUM_THREADS"] = "1" + + model = db.Model('pm6', 'pm6', '') + model.spin_mode = "unrestricted" + model.program = "sparrow" + + structures = self.manager.get_collection('structures') + reactant_one_path = os.path.join(resource_path(), "FeO_H2.xyz") + compound_one = add_compound_and_structure(self.manager, reactant_one_path, charge=1, multiplicity=6, + model=model) + reactant_one_guess = db.Structure(compound_one.get_centroid(), structures) + graph_one = json.load(open(os.path.join(resource_path(), "FeO_H2.json"), "r")) + # graph_one = json.load(open(os.path.join(resource_path(), "FeO_H2_lhs.json"), "r")) + reactant_one_guess.set_graph("masm_cbor_graph", graph_one["masm_cbor_graph"]) + reactant_one_guess.set_graph("masm_idx_map", graph_one["masm_idx_map"]) + reactant_one_guess.set_graph("masm_decision_list", graph_one["masm_decision_list"]) + + job = db.Job('scine_react_complex_nt2') + settings = { + "max_scf_iterations": 1000, + "self_consistence_criterion": 1e-6, + "density_rmsd_criterion": 1e-4, + "spin_propensity_check_for_unimolecular_reaction": False, + "spin_propensity_energy_range_to_save": 200.0, + "spin_propensity_energy_range_to_optimize": 500.0, + "spin_propensity_optimize_all": True, + "spin_propensity_check": 2, + "nt_nt_total_force_norm": 0.05, + "tsopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "tsopt_bofill_trust_radius": 0.2, + "tsopt_convergence_delta_value": 1e-6, + "irc_stop_on_error": False, + "irc_irc_coordinate_system": "cartesianWithoutRotTrans", + "irc_convergence_delta_value": 1e-6, + "ircopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "ircopt_convergence_delta_value": 1e-6, + "ircopt_convergence_max_iterations": 1000, + "opt_convergence_max_iterations": 1000, + "opt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "opt_bfgs_trust_radius": 0.2, + "opt_convergence_delta_value": 1e-6, + "nt_nt_associations": [2, 3], + "nt_nt_dissociations": [ + 0, + 2, + 1, + 3 + ] + } + + calculation = add_calculation(self.manager, model, job, [reactant_one_guess.id()], settings) + + # Run calculation/job + config = self.get_configuration() + job = ScineReactComplexNt2() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + + # Check results + elementary_steps = self.manager.get_collection("elementary_steps") + assert calculation.get_status() == db.Status.COMPLETE + results = calculation.get_results() + + assert len(results.structure_ids) == 5 # TS + product + 1 spin state changed product + 2 spin changed TS + assert len(results.elementary_step_ids) == 1 + assert structures.count("{}") == 6 # reactant + TS + product + 1 spin changed product + 2 spin changed TS + new_elementary_step = db.ElementaryStep(results.elementary_step_ids[-1], elementary_steps) + assert len(new_elementary_step.get_reactants(db.Side.RHS)[1]) == 1 + product = db.Structure(new_elementary_step.get_reactants(db.Side.RHS)[1][0], structures) + assert product.get_multiplicity() == 6 + assert product.has_property('bond_orders') + assert product.has_graph('masm_cbor_graph') + new_ts = db.Structure(new_elementary_step.get_transition_state(), structures) + assert new_ts.get_multiplicity() == 6 + assert new_ts.has_property('electronic_energy') + energy_props = new_ts.get_properties("electronic_energy") + assert energy_props[0] in results.property_ids + + assert any(structure.get_multiplicity() == 2 for structure in structures.query_structures("{}")) + assert any(structure.get_multiplicity() == 4 for structure in structures.query_structures("{}")) + assert any(structure.get_multiplicity() == 6 for structure in structures.query_structures("{}")) + + os.environ["OMP_NUM_THREADS"] = omp + + def test_propensity_hit(self): + # import Job + from scine_puffin.jobs.scine_react_complex_nt2 import ScineReactComplexNt2 + import scine_database as db + + # The parallel numerical Hessian makes problems on some machines + # Therefore, we enforce the use of the serial Hessian + omp = os.getenv("OMP_NUM_THREADS") + os.environ["OMP_NUM_THREADS"] = "1" + + model = db.Model('pm6', 'pm6', '') + model.spin_mode = "unrestricted" + model.program = "sparrow" + + structures = self.manager.get_collection('structures') + reactant_one_path = os.path.join(resource_path(), "peroxide.xyz") + # reactant_one_path = os.path.join(resource_path(), "FeO_H2_lhs.xyz") + compound_one = add_compound_and_structure(self.manager, reactant_one_path, charge=0, multiplicity=1, + model=model) + reactant_one_guess = db.Structure(compound_one.get_centroid(), structures) + graph_one = json.load(open(os.path.join(resource_path(), "peroxide.json"), "r")) + # graph_one = json.load(open(os.path.join(resource_path(), "FeO_H2_lhs.json"), "r")) + reactant_one_guess.set_graph("masm_cbor_graph", graph_one["masm_cbor_graph"]) + reactant_one_guess.set_graph("masm_idx_map", graph_one["masm_idx_map"]) + reactant_one_guess.set_graph("masm_decision_list", graph_one["masm_decision_list"]) + + job = db.Job('scine_react_complex_nt2') + settings = { + "max_scf_iterations": 1000, + "self_consistence_criterion": 1e-7, + "density_rmsd_criterion": 1e-4, + "spin_propensity_check_for_unimolecular_reaction": False, + "spin_propensity_energy_range_to_save": 250.0, + "spin_propensity_energy_range_to_optimize": 500.0, + "spin_propensity_optimize_all": True, + "spin_propensity_check": 1, + "nt_nt_total_force_norm": 0.05, + "tsopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "tsopt_bofill_trust_radius": 0.2, + "tsopt_convergence_delta_value": 1e-6, + "irc_stop_on_error": False, + "irc_irc_coordinate_system": "cartesianWithoutRotTrans", + "irc_convergence_delta_value": 1e-6, + "ircopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "ircopt_convergence_delta_value": 1e-6, + "ircopt_bfgs_trust_radius": 0.2, + "ircopt_convergence_requirement": 1, + "ircopt_convergence_max_iterations": 1000, + "opt_convergence_max_iterations": 1000, + "opt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "opt_bfgs_trust_radius": 0.2, + "opt_convergence_delta_value": 1e-6, + "nt_nt_associations": [], + "nt_nt_dissociations": [ + 0, 1, + 2, 3, + 4, 5 + ] + } + + calculation = add_calculation(self.manager, model, job, [reactant_one_guess.id()], settings) + + # Run calculation/job + config = self.get_configuration() + job = ScineReactComplexNt2() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + + # Check results + elementary_steps = self.manager.get_collection("elementary_steps") + assert calculation.get_status() == db.Status.COMPLETE + results = calculation.get_results() + + assert len(results.structure_ids) == 8 # TS + 3 product + 3 spin state changed product + 1 spin changed TS + assert len(results.elementary_step_ids) == 1 + # Total structure count: reactant + TS + 3 product + 3 spin state changed product + 1 spin changed TS + assert structures.count("{}") == 9 + new_elementary_step = db.ElementaryStep(results.elementary_step_ids[-1], elementary_steps) + assert len(new_elementary_step.get_reactants(db.Side.RHS)[1]) == 3 + product = db.Structure(new_elementary_step.get_reactants(db.Side.RHS)[1][0], structures) + assert product.get_multiplicity() == 3 + assert product.has_property('bond_orders') + assert product.has_graph('masm_cbor_graph') + product_1 = db.Structure(new_elementary_step.get_reactants(db.Side.RHS)[1][1], structures) + assert product_1.get_multiplicity() == 1 + assert product_1.has_property('bond_orders') + assert product_1.has_graph('masm_cbor_graph') + new_ts = db.Structure(new_elementary_step.get_transition_state(), structures) + assert new_ts.get_multiplicity() == 1 + assert new_ts.has_property('electronic_energy') + energy_props = new_ts.get_properties("electronic_energy") + assert energy_props[0] in results.property_ids + + assert any(structure.get_multiplicity() == 1 for structure in structures.query_structures("{}")) + assert any(structure.get_multiplicity() == 3 for structure in structures.query_structures("{}")) + + os.environ["OMP_NUM_THREADS"] = omp diff --git a/scine_puffin/tests/jobs/test_scine_react_complex_nt_job.py b/scine_puffin/tests/jobs/test_scine_react_complex_nt_job.py index 324ffc6..c4c3792 100644 --- a/scine_puffin/tests/jobs/test_scine_react_complex_nt_job.py +++ b/scine_puffin/tests/jobs/test_scine_react_complex_nt_job.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ import os import json +import numpy as np + from ..testcases import ( JobTestCase, skip_without @@ -16,6 +18,7 @@ from ..db_setup import ( add_calculation, add_compound_and_structure, + add_structure, ) from ..resources import resource_path @@ -151,7 +154,7 @@ def test_energy_and_structure(self): elementary_steps = self.manager.get_collection("elementary_steps") assert calculation.get_status() == db.Status.COMPLETE results = calculation.get_results() - assert len(results.property_ids) == 10 + assert len(results.property_ids) == 11 assert len(results.structure_ids) == 3 + 2 # re-optimized reactants (x2) + complex + TS + product assert len(results.elementary_step_ids) == 2 new_elementary_step_one = db.ElementaryStep(results.elementary_step_ids[0], elementary_steps) @@ -176,3 +179,343 @@ def test_energy_and_structure(self): assert energy_props[0] in results.property_ids energy = db.NumberProperty(energy_props[0], properties) self.assertAlmostEqual(energy.get_data(), -31.659096385274275, delta=1e-1) + + @skip_without('database', 'readuct', 'molassembler', 'ams_wrapper', 'ams') + def test_surface_reaction(self): + # import Job + from scine_puffin.jobs.scine_react_complex_nt import ScineReactComplexNt + import scine_database as db + + # The parallel numerical Hessian via the PipeInterface of AMS has a high numerical error. + # Therefore, we enforce the use of the serial Hessian + omp = os.getenv("OMP_NUM_THREADS") + os.environ["OMP_NUM_THREADS"] = "1" + + structures = self.manager.get_collection("structures") + properties = self.manager.get_collection("properties") + elementary_steps = self.manager.get_collection("elementary_steps") + model = db.Model('reaxff', 'reaxff', '') + model.program = "ams" + model.spin_mode = "none" + model.electronic_temperature = "none" + + reactant_one_path = os.path.join(resource_path(), "h2.xyz") + reactant_two_path = os.path.join(resource_path(), "au.xyz") + complex_path = os.path.join(resource_path(), "au_complex.xyz") + compound_one = add_compound_and_structure(self.manager, reactant_one_path, multiplicity=3) + compound_two = add_compound_and_structure(self.manager, reactant_two_path, + label=db.Label.USER_SURFACE_OPTIMIZED) + reactant_one_guess = db.Structure(compound_one.get_centroid(), structures) + reactant_one_guess.set_model(model) + reactant_two_guess = db.Structure(compound_two.get_centroid(), structures) + model.periodic_boundaries = "16.721503,16.721503,40.959149,90.000000,90.000000,120.000000,xyz" + reactant_two_guess.set_model(model) + complex_structure = add_structure(self.manager, complex_path, db.Label.SURFACE_ADSORPTION_GUESS, 0, 3) + complex_structure.set_model(model) + + indices = np.array([float(i) for i in range(27)]) + complex_slab_dict = \ + "{'@module': 'pymatgen.core.surface', '@class': 'Slab', 'charge': 0, 'lattice': {'matrix': [[" \ + "8.848638315750037, 0.0, 5.418228295097955e-16], [-4.424319157875018, 7.663145570339881, " \ + "5.418228295097955e-16], [0.0, 0.0, 21.674648219236918]], 'pbc': (True, True, True), " \ + "'a': 8.848638315750037, 'b': 8.848638315750037, 'c': 21.674648219236918, 'alpha': 90.0, 'beta': 90.0, " \ + "'gamma': 119.99999999999999, 'volume': 1469.7232924497353}, 'sites': [{'species': [{'element': 'Au', " \ + "'occu': 1}], 'abc': [0.2778860961723223, 0.27785011439004653, 0.611311989614102], " \ + "'xyz': [1.2296159738909629, 2.1292058733065145, 13.249972327087475], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.27788788562832945, 0.6111879661349863, " \ + "0.6113018003926083], 'xyz': [-0.24516123538008705, 4.6836223553323615, 13.24975147929597], " \ + "'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.2778838122721543, 0.9445145401412581, 0.6113030433366179], 'xyz': [-1.7199404262404057, " \ + "7.237952414405092, 13.249778419670134], 'label': 'Au', 'properties': {}}, {'species': [{'element': " \ + "'Au', " \ + "'occu': 1}], 'abc': [0.611216010475045, 0.27784888416258796, 0.6112780782104336], " \ + "'xyz': [4.1791372682946255, 2.1291964458944146, 13.24923730934234], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.6112177834682357, 0.6111867201793348, " \ + "0.6112678888794907], 'xyz': [2.7043599829366114, 4.68361280739283, 13.249016459178563], 'label': 'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.6112137096644943, " \ + "0.9445132926045237, 0.6112691379790377], 'xyz': [1.2295807951111568, 7.237942854349492, " \ + "13.249043532971836], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.9445309020867628, 0.27784014704928345, 0.6112634112253129], 'xyz': [7.12855884519792, " \ + "2.1291294921232975, 13.248919407599413], 'label': 'Au', 'properties': {}}, {'species': [{'element': " \ + "'Au', 'occu': 1}], 'abc': [0.9445326829170073, 0.611177990996807, 0.6112532213800518], " \ + "'xyz': [5.653781594098872, 4.68354591439641, 13.24869854628797], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.9445285635060592, 0.94450454403208, " \ + "0.6112544929457119], 'xyz': [4.179002088498917, 7.237875812765322, 13.24872610702634], 'label': 'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.05547002745654646, " \ + "0.16660427084788956, 0.3887505010887012], 'xyz': [-0.24627625696841252, 1.2767127801477107, " \ + "8.426030356149676], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.05546700620246067, 0.49993273725341525, 0.3887425908135827], 'xyz': [-1.7210545107361441, " \ + "3.831057340951401, 8.425858903719167], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', " \ + "'occu': 1}], 'abc': [0.05547014457126487, 0.8332732966402286, 0.38873720403725776], " \ + "'xyz': [-3.195831763437548, 6.385494572031078, 8.425742147237287], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.38878491622941225, 0.1665955429292308, " \ + "0.38873584369565045], 'xyz': [2.7031452541348595, 1.2766458968365024, 8.425712662311492], 'label': " \ + "'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.38878189968552024, " \ + "0.49992400188350966, 0.3887279232503738], 'xyz': [1.2283670750126234, 3.8309904005402036, " \ + "8.42554098944638], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.3887850322697857, 0.8332645576033345, 0.388722539452408], 'xyz': [-0.24641021265014373, " \ + "6.385427603519213, 8.425424297519388], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', " \ + "'occu': 1}], 'abc': [0.7221148122596339, 0.16659429520467012, 0.38870193527793867], " \ + "'xyz': [5.6526664642645335, 1.2766363353415624, 8.424977709285917], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.7221117995823851, 0.4999227580196199, " \ + "0.3886940123612214], 'xyz': [4.177888302275983, 3.8309808686301463, 8.4248059828532], 'label': 'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.7221149470996455, " \ + "0.8332633260862631, 0.3886886262722038], 'xyz': [2.7031110921236228, 6.385418166224623, " \ + "8.424689241268467], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.16668888044495933, 0.05556470280564529, 0.5000187643769566], 'xyz': [1.2291336351900959, " \ + "0.42580040617233267, 10.837730820888046], 'label': 'Au', 'properties': {}}, {'species': [{'element': " \ + "'Au', 'occu': 1}], 'abc': [0.1666892016854119, 0.38889995593508075, 0.5000225659234466], " \ + "'xyz': [-0.24564506868500663, 2.980196974629289, 10.837813218070908], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.1666891714443799, 0.722234185396945, " \ + "0.5000123683272975], 'xyz': [-1.7204223536605938, 5.534585698572632, 10.837592188761692], 'label': " \ + "'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.4999999739438076, " \ + "0.0555540754598818, 0.5000004103713492], 'xyz': [4.178529966958006, 0.4257189672747207, " \ + "10.837333004273093], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.5000003511660567, 0.388889347819449, 0.5000041891925947], 'xyz': [2.7037516733653333, " \ + "2.9801156830949758, 10.837414908894273], 'label': 'Au', 'properties': {}}, {'species': [{'element': " \ + "'Au', 'occu': 1}], 'abc': [0.5000003183460272, 0.7222235749835915, 0.49999399112578524], " \ + "'xyz': [1.2289743757349842, 5.534504389430542, 10.837193869383661], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.8333111067953511, 0.055543465441786885, " \ + "0.49998204163747373], 'xyz': [7.1279265702805485, 0.42563766116155544, 10.83693486842811], " \ + "'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.8333114961523033, 0.38887873511461624, 0.4999858116310773], 'xyz': [5.653148395950459, " \ + "2.9800343563929474, 10.837016581713256], 'label': 'Au', 'properties': {}}, {'species': [{'element': " \ + "'Au', 'occu': 1}], 'abc': [0.833311468474635, 0.7222129708839298, 0.4999756172013136], " \ + "'xyz': [4.178371105750982, 5.534423128671192, 10.836795621034332], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'H', 'occu': 1}], 'abc': [0.35096447466090563, 0.3462402223346323, " \ + "0.7601942160953102], 'xyz': [1.5736804490495533, 2.653289226057133, 16.47694221216442], 'label': 'H', " \ + "'properties': {}}, {'species': [{'element': 'H', 'occu': 1}], 'abc': [0.3551009194623986, " \ + "0.3492683100920289, 0.7951234817366677], 'xyz': [1.5968851263342523, 2.6764939033418274, " \ + "17.23402175749712], 'label': 'H', 'properties': {}}], 'oriented_unit_cell': {'@module': " \ + "'pymatgen.core.structure', '@class': 'Structure', 'charge': 0, 'lattice': {'matrix': [[2.085644, " \ + "-2.085644, -0.0], [-0.0, 2.085644, -2.085644], [4.171288, 4.171288, 4.1712880000000006]], 'pbc': (True, " \ + "True, True), 'a': 2.9495460310820714, 'b': 2.9495460310820714, 'c': 7.224882749002367, 'alpha': 90.0, " \ + "'beta': 90.0, 'gamma': 120.00000000000001, 'volume': 54.434193348844616}, 'sites': [{'species': [{" \ + "'element': 'Au', 'occu': 1}], 'abc': [2.7563307014654614e-32, 5.8228074529959955e-33, " \ + "1.1753544774588717e-16], 'xyz': [4.902742027570462e-16, 4.902742027570461e-16, 4.902742027570463e-16], " \ + "'label': 'Au', 'properties': {'bulk_wyckoff': 'a', 'bulk_equivalent': 0}}, {'species': [{'element': " \ + "'Au', 'occu': 1}], 'abc': [0.33333333333333337, 0.6666666666666666, 0.3333333333333334], " \ + "'xyz': [2.0856440000000003, 2.0856440000000003, 8.30010874845281e-16], 'label': 'Au', 'properties': {" \ + "'bulk_wyckoff': 'a', 'bulk_equivalent': 0}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.6666666666666664, 0.3333333333333333, 0.6666666666666667], 'xyz': [4.171288, " \ + "2.0856440000000007, 2.0856440000000007], 'label': 'Au', 'properties': {'bulk_wyckoff': 'a', " \ + "'bulk_equivalent': 0}}]}, 'miller_index': (1, 1, 1), 'shift': 0.16666666666666677, 'scale_factor': [[1, " \ + "-1, 0], [1, 0, -1], [1, 1, 1]], 'reconstruction': None, 'energy': None} " + + reactant_slab_dict = \ + "{'@module': 'pymatgen.core.surface', '@class': 'Slab', 'charge': 0, 'lattice': {'matrix': [[" \ + "8.848638093246214, 0.0, 5.418228158853657e-16], [-4.424319046623108, 7.663145377645916, " \ + "5.418228158853657e-16], [0.0, 0.0, 21.6746482470071]], 'pbc': (True, True, True), " \ + "'a': 8.848638093246214, 'b': 8.848638093246214, 'c': 21.6746482470071, 'alpha': 90.0, 'beta': 90.0, " \ + "'gamma': 120.00000000000001, 'volume': 1469.723220418804}, 'sites': [{'species': [{'element': 'Au', " \ + "'occu': 1}], 'abc': [0.3333333333333331, 0.3333333333333331, 0.6111111111111112], " \ + "'xyz': [1.4747730155410343, 2.554381792548637, 13.245618373171006], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.33333333333333304, 0.6666666666666664, " \ + "0.6111111111111112], 'xyz': [-1.962495598385766e-15, 5.108763585097275, 13.245618373171006], " \ + "'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.3333333333333331, 0.9999999999999998, 0.6111111111111113], 'xyz': [-1.4747730155410377, " \ + "7.663145377645915, 13.245618373171009], 'label': 'Au', 'properties': {}}, {'species': " \ + "[{'element': 'Au', " \ + "'occu': 1}], 'abc': [0.6666666666666664, 0.3333333333333331, 0.6111111111111112], " \ + "'xyz': [4.424319046623105, 2.554381792548637, 13.245618373171006], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.6666666666666663, 0.6666666666666664, " \ + "0.6111111111111113], 'xyz': [2.9495460310820687, 5.108763585097275, 13.245618373171009], 'label': 'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.6666666666666664, " \ + "0.9999999999999998, 0.6111111111111112], 'xyz': [1.4747730155410332, 7.663145377645915, " \ + "13.245618373171007], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.9999999999999996, 0.3333333333333331, 0.6111111111111113], 'xyz': [7.373865077705175, " \ + "2.554381792548637, 13.245618373171009], 'label': 'Au', 'properties': {}}, {'species': [{'element': " \ + "'Au', " \ + "'occu': 1}], 'abc': [0.9999999999999996, 0.6666666666666664, 0.6111111111111112], " \ + "'xyz': [5.899092062164139, 5.108763585097275, 13.245618373171007], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.9999999999999996, 0.9999999999999998, " \ + "0.6111111111111112], 'xyz': [4.424319046623103, 7.663145377645915, 13.245618373171007], 'label': 'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.11111111111111106, " \ + "0.22222222222222224, 0.38888888888888895], 'xyz': [-6.973483372021525e-16, 1.702921195032426, " \ + "8.429029873836095], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.11111111111111109, 0.5555555555555555, 0.38888888888888895], 'xyz': [-1.474773015541036, " \ + "4.257302987581064, 8.429029873836095], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', " \ + "'occu': 1}], 'abc': [0.11111111111111112, 0.8888888888888887, 0.388888888888889], " \ + "'xyz': [-2.9495460310820714, 6.811684780129703, 8.429029873836097], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.4444444444444444, 0.22222222222222224, " \ + "0.38888888888888895], 'xyz': [2.9495460310820705, 1.702921195032426, 8.429029873836095], 'label': 'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.44444444444444436, " \ + "0.5555555555555555, 0.388888888888889], 'xyz': [1.4747730155410346, 4.257302987581064, " \ + "8.429029873836097], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.44444444444444436, 0.8888888888888887, 0.3888888888888889], 'xyz': [-9.188187543387658e-16, " \ + "6.811684780129703, 8.429029873836095], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', " \ + "'occu': 1}], 'abc': [0.7777777777777778, 0.22222222222222224, 0.388888888888889], " \ + "'xyz': [5.899092062164143, 1.702921195032426, 8.429029873836097], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.7777777777777776, 0.5555555555555555, " \ + "0.3888888888888889], 'xyz': [4.424319046623105, 4.257302987581064, 8.429029873836095], 'label': 'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.7777777777777778, " \ + "0.8888888888888887, 0.3888888888888889], 'xyz': [2.9495460310820714, 6.811684780129703, " \ + "8.429029873836095], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.2222222222222221, 0.11111111111111109, 0.5000000000000001], 'xyz': [1.4747730155410346, " \ + "0.8514605975162128, 10.837324123503551], 'label': 'Au', 'properties': {}}, {'species': [{'element': " \ + "'Au', 'occu': 1}], 'abc': [0.22222222222222213, 0.44444444444444436, 0.5000000000000001], " \ + "'xyz': [-9.034985870194455e-16, 3.4058423900648513, 10.837324123503553], 'label': 'Au', 'properties': {" \ + "}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.22222222222222207, 0.7777777777777777, " \ + "0.5000000000000001], 'xyz': [-1.4747730155410372, 5.96022418261349, 10.837324123503553], 'label': 'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.5555555555555555, " \ + "0.11111111111111109, 0.5000000000000001], 'xyz': [4.424319046623106, 0.8514605975162128, " \ + "10.837324123503553], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.5555555555555554, 0.44444444444444436, 0.5000000000000001], 'xyz': [2.9495460310820696, " \ + "3.4058423900648513, 10.837324123503553], 'label': 'Au', 'properties': {}}, {'species': [{'element': " \ + "'Au', 'occu': 1}], 'abc': [0.5555555555555554, 0.7777777777777777, 0.5000000000000001], " \ + "'xyz': [1.474773015541034, 5.96022418261349, 10.837324123503553], 'label': 'Au', 'properties': {}}, " \ + "{'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.8888888888888885, 0.11111111111111109, " \ + "0.5000000000000001], 'xyz': [7.373865077705174, 0.8514605975162128, 10.837324123503553], 'label': 'Au', " \ + "'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], 'abc': [0.8888888888888886, " \ + "0.44444444444444436, 0.5000000000000001], 'xyz': [5.89909206216414, 3.4058423900648513, " \ + "10.837324123503553], 'label': 'Au', 'properties': {}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.8888888888888886, 0.7777777777777777, 0.5000000000000001], 'xyz': [4.424319046623104, " \ + "5.96022418261349, 10.837324123503553], 'label': 'Au', 'properties': {}}], 'oriented_unit_cell': {" \ + "'@module': 'pymatgen.core.structure', '@class': 'Structure', 'charge': 0, 'lattice': {'matrix': [[" \ + "2.085644, -2.085644, -0.0], [-0.0, 2.085644, -2.085644], [4.171288, 4.171288, 4.1712880000000006]], " \ + "'pbc': (True, True, True), 'a': 2.9495460310820714, 'b': 2.9495460310820714, 'c': 7.224882749002367, " \ + "'alpha': 90.0, 'beta': 90.0, 'gamma': 120.00000000000001, 'volume': 54.434193348844616}, 'sites': [{" \ + "'species': [{'element': 'Au', 'occu': 1}], 'abc': [2.7563307014654614e-32, 5.8228074529959955e-33, " \ + "1.1753544774588717e-16], 'xyz': [4.902742027570462e-16, 4.902742027570461e-16, 4.902742027570463e-16], " \ + "'label': 'Au', 'properties': {'bulk_wyckoff': 'a', 'bulk_equivalent': 0}}, {'species': [{'element': " \ + "'Au', 'occu': 1}], 'abc': [0.33333333333333337, 0.6666666666666666, 0.3333333333333334], " \ + "'xyz': [2.0856440000000003, 2.0856440000000003, 8.30010874845281e-16], 'label': 'Au', 'properties': {" \ + "'bulk_wyckoff': 'a', 'bulk_equivalent': 0}}, {'species': [{'element': 'Au', 'occu': 1}], " \ + "'abc': [0.6666666666666664, 0.3333333333333333, 0.6666666666666667], 'xyz': [4.171288, " \ + "2.0856440000000007, 2.0856440000000007], 'label': 'Au', 'properties': {'bulk_wyckoff': 'a', " \ + "'bulk_equivalent': 0}}]}, 'miller_index': (1, 1, 1), 'shift': 0.16666666666666677, 'scale_factor': [[1, " \ + "-1, 0], [1, 0, -1], [1, 1, 1]], 'reconstruction': None, 'energy': None} " + + # properties + # adsorption type + adsorption_property = db.BoolProperty.make("true_adsorption", model, True, properties) + complex_structure.set_property("true_adsorption", adsorption_property.id()) + # surface indices + surface_indices_property = db.VectorProperty.make("surface_indices", model, indices, properties) + reactant_two_guess.set_property("surface_atom_indices", surface_indices_property.id()) + complex_structure.set_property("surface_atom_indices", surface_indices_property.id()) + # slab dicts + reactant_slab_property = db.StringProperty.make("slab_dict", model, reactant_slab_dict, properties) + complex_slab_property = db.StringProperty.make("slab_dict", model, complex_slab_dict, properties) + reactant_two_guess.set_property("slab_dict", reactant_slab_property.id()) + complex_structure.set_property("slab_dict", complex_slab_property.id()) + + graph_one = json.load(open(os.path.join(resource_path(), "h2.json"), "r")) + graph_two = json.load(open(os.path.join(resource_path(), "au.json"), "r")) + reactant_one_guess.set_graph("masm_cbor_graph", graph_one["masm_cbor_graph"]) + reactant_one_guess.set_graph("masm_idx_map", graph_one["masm_idx_map"]) + reactant_one_guess.set_graph("masm_decision_list", graph_one["masm_decision_list"]) + reactant_two_guess.set_graph("masm_cbor_graph", graph_two["masm_cbor_graph"]) + reactant_two_guess.set_graph("masm_idx_map", graph_two["masm_idx_map"]) + reactant_two_guess.set_graph("masm_decision_list", graph_two["masm_decision_list"]) + + db_job = db.Job('scine_react_complex_nt') + + settings = { + "external_program_nprocs": 1, + "spin_propensity_check": 0, + "nt_convergence_max_iterations": 600, + "nt_nt_total_force_norm": 0.1, + "nt_sd_factor": 1.0, + "nt_nt_use_micro_cycles": True, + "nt_nt_fixed_number_of_micro_cycles": True, + "nt_nt_number_of_micro_cycles": 10, + "nt_nt_filter_passes": 10, + "tsopt_convergence_max_iterations": 2000, + "tsopt_convergence_step_max_coefficient": 0.002, + "tsopt_convergence_step_rms": 0.001, + "tsopt_convergence_gradient_max_coefficient": 0.0002, + "tsopt_convergence_gradient_rms": 0.0001, + "tsopt_convergence_requirement": 3, + "tsopt_convergence_delta_value": 0.000001, + "tsopt_optimizer": "dimer", + "tsopt_dimer_calculate_hessian_once": True, + "tsopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "tsopt_dimer_trust_radius": 0.2, + "irc_convergence_max_iterations": 100, + "irc_sd_factor": 2.0, + "irc_irc_initial_step_size": 0.3, + "irc_stop_on_error": False, + "irc_convergence_step_max_coefficient": 0.002, + "irc_convergence_step_rms": 0.001, + "irc_convergence_gradient_max_coefficient": 0.0002, + "irc_convergence_gradient_rms": 0.0001, + "irc_convergence_delta_value": 0.000001, + "irc_irc_coordinate_system": "cartesianWithoutRotTrans", + "ircopt_convergence_max_iterations": 2000, + "ircopt_convergence_step_max_coefficient": 0.002, + "ircopt_convergence_step_rms": 0.001, + "ircopt_convergence_gradient_max_coefficient": 0.0002, + "ircopt_convergence_gradient_rms": 0.0001, + "ircopt_convergence_requirement": 3, + "ircopt_convergence_delta_value": 0.000001, + "ircopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "ircopt_bfgs_use_trust_radius": True, + "ircopt_bfgs_trust_radius": 0.2, + "opt_convergence_max_iterations": 2000, + "opt_convergence_step_max_coefficient": 0.002, + "opt_convergence_step_rms": 0.001, + "opt_convergence_gradient_max_coefficient": 0.0002, + "opt_convergence_gradient_rms": 0.0001, + "opt_convergence_requirement": 3, + "opt_convergence_delta_value": 0.000001, + "opt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "opt_bfgs_use_trust_radius": True, + "opt_bfgs_trust_radius": 0.4, + "rcopt_convergence_max_iterations": 2000, + "rcopt_convergence_step_max_coefficient": 0.002, + "rcopt_convergence_step_rms": 0.001, + "rcopt_convergence_gradient_max_coefficient": 0.0002, + "rcopt_convergence_gradient_rms": 0.0001, + "rcopt_convergence_requirement": 3, + "rcopt_convergence_delta_value": 0.000001, + "rcopt_geoopt_coordinate_system": "cartesianWithoutRotTrans", + "rcopt_bfgs_use_trust_radius": True, + "rcopt_bfgs_trust_radius": 0.4, + "method_parameters": "AuCSOH.ff", + "only_distance_connectivity": True, + "nt_nt_rhs_list": [27], + "nt_nt_lhs_list": [0], + "nt_nt_movable_side": "rhs", + "nt_nt_attractive": True, + "rc_minimal_spin_multiplicity": False + } + + calculation = add_calculation(self.manager, model, db_job, + [reactant_two_guess.id(), reactant_one_guess.id(), complex_structure.id()], + settings) + + # Run calculation/job + config = self.get_configuration() + job = ScineReactComplexNt() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + + # Check results + assert calculation.get_status() == db.Status.COMPLETE + results = calculation.get_results() + print(calculation.get_comment()) + assert len(results.property_ids) > 5 + assert len(results.structure_ids) == 2 # TS + product + assert len(results.elementary_step_ids) == 1 + new_elementary_step = db.ElementaryStep(results.elementary_step_ids[0], elementary_steps) + assert new_elementary_step.get_type() == db.ElementaryStepType.REGULAR + assert len(new_elementary_step.get_reactants(db.Side.LHS)[0]) == 2 + assert len(new_elementary_step.get_reactants(db.Side.RHS)[1]) == 1 + product = db.Structure(new_elementary_step.get_reactants(db.Side.RHS)[1][0], structures) + assert product.get_label() == db.Label.SURFACE_OPTIMIZED + assert product.has_property('bond_orders') + assert product.has_graph('masm_cbor_graph') + assert all(product.has_properties(p) for p in ['bond_orders', 'surface_atom_indices', 'slab_dict']) + indices_prop = db.VectorProperty(product.get_property('surface_atom_indices'), properties) + assert np.allclose(indices_prop.get_data(), indices) + + new_ts = db.Structure(new_elementary_step.get_transition_state(), structures) + assert new_ts.has_property('electronic_energy') + energy_props = new_ts.get_properties("electronic_energy") + assert energy_props[0] in results.property_ids + energy = db.NumberProperty(energy_props[0], properties) + self.assertAlmostEqual(energy.get_data(), -3.587814458488312, delta=1e-1) + os.environ["OMP_NUM_THREADS"] = omp diff --git a/scine_puffin/tests/jobs/test_scine_react_job.py b/scine_puffin/tests/jobs/test_scine_react_job.py index cd10479..dceed54 100644 --- a/scine_puffin/tests/jobs/test_scine_react_job.py +++ b/scine_puffin/tests/jobs/test_scine_react_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ import numpy as np diff --git a/scine_puffin/tests/jobs/test_scine_single_point_job.py b/scine_puffin/tests/jobs/test_scine_single_point_job.py index 4db161f..4ed9003 100644 --- a/scine_puffin/tests/jobs/test_scine_single_point_job.py +++ b/scine_puffin/tests/jobs/test_scine_single_point_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/jobs/test_scine_step_refinement_job.py b/scine_puffin/tests/jobs/test_scine_step_refinement_job.py index b97cbef..712c672 100644 --- a/scine_puffin/tests/jobs/test_scine_step_refinement_job.py +++ b/scine_puffin/tests/jobs/test_scine_step_refinement_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -163,10 +163,10 @@ def test_energy_starting_from_separated_reactants(self): complex_structure.link(structures) selection = {"label": "ts_optimized"} assert structures.count(json.dumps(selection)) == 2 - assert properties.count(json.dumps({})) == 14 + assert properties.count(json.dumps({})) == 15 assert elementary_steps.count(json.dumps({})) == 2 results = calculation.get_results() - assert len(results.property_ids) == 10 + assert len(results.property_ids) == 11 assert len(results.structure_ids) == 5 assert len(results.elementary_step_ids) == 2 # The regular elementary step should be the last one in the list. @@ -332,10 +332,10 @@ def test_energy_starting_from_complex(self): complex_structure.link(structures) selection = {"label": "ts_optimized"} assert structures.count(json.dumps(selection)) == 2 - assert properties.count(json.dumps({})) == 14 + assert properties.count(json.dumps({})) == 15 assert elementary_steps.count(json.dumps({})) == 2 results = calculation.get_results() - assert len(results.property_ids) == 10 + assert len(results.property_ids) == 11 assert len(results.structure_ids) == 5 assert len(results.elementary_step_ids) == 2 # The regular elementary step should be the last one in the list. diff --git a/scine_puffin/tests/jobs/test_scine_ts_optimization_job.py b/scine_puffin/tests/jobs/test_scine_ts_optimization_job.py index 2775c23..80e40f7 100644 --- a/scine_puffin/tests/jobs/test_scine_ts_optimization_job.py +++ b/scine_puffin/tests/jobs/test_scine_ts_optimization_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/jobs/test_sleep_job.py b/scine_puffin/tests/jobs/test_sleep_job.py index 59c7a12..66528a9 100644 --- a/scine_puffin/tests/jobs/test_sleep_job.py +++ b/scine_puffin/tests/jobs/test_sleep_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/jobs/test_turbomole_bond_orders.py b/scine_puffin/tests/jobs/test_turbomole_bond_orders.py index b99136e..ff33c5e 100644 --- a/scine_puffin/tests/jobs/test_turbomole_bond_orders.py +++ b/scine_puffin/tests/jobs/test_turbomole_bond_orders.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -32,6 +32,7 @@ def test_energy(self): water = os.path.join(resource_path(), "water.xyz") structure = add_structure(self.manager, water, db.Label.USER_GUESS) model = db.Model('dft', 'pbe', 'def2-SVP') + model.program = "turbomole" job = db.Job('turbomole_bond_orders') calculation = add_calculation(self.manager, model, job, [structure.id()]) diff --git a/scine_puffin/tests/jobs/test_turbomole_geometry_optimization_job.py b/scine_puffin/tests/jobs/test_turbomole_geometry_optimization_job.py index 17906e7..9368bf6 100644 --- a/scine_puffin/tests/jobs/test_turbomole_geometry_optimization_job.py +++ b/scine_puffin/tests/jobs/test_turbomole_geometry_optimization_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -32,6 +32,7 @@ def test_energy(self): water = os.path.join(resource_path(), "water.xyz") structure = add_structure(self.manager, water, db.Label.USER_GUESS) model = db.Model('dft', 'pbe', 'def2-SVP') + model.program = "turbomole" job = db.Job('turbomole_geometry_optimization') calculation = add_calculation(self.manager, model, job, [structure.id()]) diff --git a/scine_puffin/tests/jobs/test_turbomole_hessian.py b/scine_puffin/tests/jobs/test_turbomole_hessian.py index 011f493..3c5b216 100644 --- a/scine_puffin/tests/jobs/test_turbomole_hessian.py +++ b/scine_puffin/tests/jobs/test_turbomole_hessian.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -32,6 +32,7 @@ def test_hessian(self): water = os.path.join(resource_path(), "water.xyz") structure = add_structure(self.manager, water, db.Label.USER_GUESS) model = db.Model('dft', 'pbe', 'def2-SVP') + model.program = "turbomole" job = db.Job('turbomole_hessian') calculation = add_calculation(self.manager, model, job, [structure.id()]) diff --git a/scine_puffin/tests/jobs/test_turbomole_single_point_job.py b/scine_puffin/tests/jobs/test_turbomole_single_point_job.py index e322925..49f89e6 100644 --- a/scine_puffin/tests/jobs/test_turbomole_single_point_job.py +++ b/scine_puffin/tests/jobs/test_turbomole_single_point_job.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -32,6 +32,7 @@ def test_energy(self): water = os.path.join(resource_path(), "water.xyz") structure = add_structure(self.manager, water, db.Label.USER_GUESS) model = db.Model('dft', 'pbe', 'def2-SVP') + model.program = "turbomole" job = db.Job('turbomole_single_point') calculation = add_calculation(self.manager, model, job, [structure.id()]) diff --git a/scine_puffin/tests/masm_info_test.py b/scine_puffin/tests/masm_info_test.py index 239d608..58862f4 100644 --- a/scine_puffin/tests/masm_info_test.py +++ b/scine_puffin/tests/masm_info_test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -14,7 +14,7 @@ import scine_database as db import scine_molassembler as masm import scine_utilities as utils - from scine_puffin.utilities.masm_helper import add_masm_info, _modify_based_on_distances + from scine_puffin.utilities.masm_helper import add_masm_info, get_molecules_result, _modify_based_on_distances except ImportError: pass else: @@ -22,8 +22,8 @@ class MasmHelperTests(unittest.TestCase): def __init__(self, *args, **kwargs): super(MasmHelperTests, self).__init__(*args, **kwargs) self.settings = { - "sub_based_on_distance_connectivity": False, - "add_based_on_distance_connectivity": False, + "sub_based_on_distance_connectivity": True, + "add_based_on_distance_connectivity": True, } class MockStructure(object): @@ -162,3 +162,105 @@ def test_surface_structure(self): assert "masm_decision_list" in structure.graph_dict assert "masm_cbor_graph" in structure.graph_dict assert "masm_idx_map" in structure.graph_dict + + def test_split_surface_structure(self): + xyz_file_content = """81 + +Cu 1.0719532500 1.0719532500 35.0171395000 +Cu 1.0719532500 5.3597662500 35.0171395000 +Cu 5.3597662500 1.0719532500 35.0171395000 +Cu 5.3597662500 5.3597662500 35.0171395000 +Cu 3.2158597500 1.0719532500 32.8732330000 +Cu 3.2158597500 5.3597662500 32.8732330000 +Cu 7.5036727500 1.0719532500 32.8732330000 +Cu 7.5036727500 5.3597662500 32.8732330000 +Cu 1.0719532500 3.2158597500 32.8732330000 +Cu 1.0719532500 7.5036727500 32.8732330000 +Cu 5.3597662500 3.2158597500 32.8732330000 +Cu 5.3597662500 7.5036727500 32.8732330000 +Cu 3.2158597500 3.2158597500 35.0171395000 +Cu 3.2158597500 7.5036727500 35.0171395000 +Cu 7.5036727500 3.2158597500 35.0171395000 +Cu 7.5036727500 7.5036727500 35.0171395000 +O 2.1439065000 2.1439065000 33.9451862500 +O 2.1439065000 6.4317195000 33.9451862500 +O 6.4317195000 2.1439065000 33.9451862500 +O 6.4317195000 6.4317195000 33.9451862500 +O 0.0000000000 0.0000000000 36.0890927500 +O 0.0000000000 4.2878130000 36.0890927500 +O 4.2878130000 0.0000000000 36.0890927500 +O 4.2878130000 4.2878130000 36.0890927500 +Cu 1.0719532500 1.0719532500 39.3049525000 +Cu 1.0719532500 5.3597662500 39.3049525000 +Cu 5.3597662500 1.0719532500 39.3049525000 +Cu 5.3597662500 5.3597662500 39.3049525000 +Cu 3.2158597500 1.0719532500 37.1610460000 +Cu 3.2158597500 5.3597662500 37.1610460000 +Cu 7.5036727500 1.0719532500 37.1610460000 +Cu 7.5036727500 5.3597662500 37.1610460000 +Cu 1.0719532500 3.2158597500 37.1610460000 +Cu 1.0719532500 7.5036727500 37.1610460000 +Cu 5.3597662500 3.2158597500 37.1610460000 +Cu 5.3597662500 7.5036727500 37.1610460000 +Cu 3.2158597500 3.2158597500 39.3049525000 +Cu 3.2158597500 7.5036727500 39.3049525000 +Cu 7.5036727500 3.2158597500 39.3049525000 +Cu 7.5036727500 7.5036727500 39.3049525000 +O 2.1439065000 2.1439065000 38.2329992500 +O 2.1439065000 6.4317195000 38.2329992500 +O 6.4317195000 2.1439065000 38.2329992500 +O 6.4317195000 6.4317195000 38.2329992500 +O 0.0000000000 0.0000000000 40.3769057500 +O 0.0000000000 4.2878130000 40.3769057500 +O 4.2878130000 0.0000000000 40.3769057500 +O 4.2878130000 4.2878130000 40.3769057500 +Cu 1.0719532500 1.0719532500 43.5927655000 +Cu 1.0719532500 5.3597662500 43.5927655000 +Cu 5.3597662500 1.0719532500 43.5927655000 +Cu 5.3597662500 5.3597662500 43.5927655000 +Cu 3.2158597500 1.0719532500 41.4488590000 +Cu 3.2158597500 5.3597662500 41.4488590000 +Cu 7.5036727500 1.0719532500 41.4488590000 +Cu 7.5036727500 5.3597662500 41.4488590000 +Cu 1.0719532500 3.2158597500 41.4488590000 +Cu 1.0719532500 7.5036727500 41.4488590000 +Cu 5.3597662500 3.2158597500 41.4488590000 +Cu 5.3597662500 7.5036727500 41.4488590000 +Cu 3.2158597500 3.2158597500 43.5927655000 +Cu 3.2158597500 7.5036727500 43.5927655000 +Cu 7.5036727500 3.2158597500 43.5927655000 +Cu 7.5036727500 7.5036727500 43.5927655000 +O 2.1439065000 2.1439065000 42.5208122500 +O 2.1439065000 6.4317195000 42.5208122500 +O 6.4317195000 2.1439065000 42.5208122500 +O 6.4317195000 6.4317195000 42.5208122500 +O 0.0000000000 0.0000000000 44.6647187500 +O 0.0000000000 4.2878130000 44.6647187500 +O 4.2878130000 0.0000000000 44.6647187500 +O 4.2878130000 4.2878130000 44.6647187500 +C 7.8374544210 6.0029380451 47.2231590235 +C 6.3856957891 6.3093735268 47.1847260085 +C 5.8568798291 7.5383134573 46.9965352261 +H 8.1063386427 5.4638831305 48.1468080782 +H 8.1035125720 5.3095234924 46.3964106616 +H 8.4554510178 6.9063900636 47.1308283651 +H 5.7020734826 5.4626932681 47.3117261503 +H 4.7827542159 7.7098942445 46.9898841604 +H 6.5007646914 8.4087043392 46.8529728506 + """ + pbc = utils.PeriodicBoundaries("16.205584494355413,16.205584494355413,145.85026044919871,90.0,90.0,90.0") + ac, bo = self.read_fake_files(xyz_file_content, "test_surface_split.xyz") + surface_indices = set(range(72)) + bo = utils.SolidStateBondDetector.detect_bonds(ac, pbc, surface_indices) + structure = self.MockStructure(ac) + model = db.Model("FAKE", "", "") + model.periodic_boundaries = str(pbc) + structure.set_model(model) + mol_results = get_molecules_result(ac, bo, self.settings, str(pbc), surface_indices) + assert len(mol_results.molecules) == 2 + add_masm_info(structure, bo, self.settings, surface_indices) + assert "masm_decision_list" in structure.graph_dict + assert "masm_cbor_graph" in structure.graph_dict + assert "masm_idx_map" in structure.graph_dict + cbor = structure.graph_dict['masm_cbor_graph'] + assert cbor.count(';') == 1 diff --git a/scine_puffin/tests/mrcc/__init__.py b/scine_puffin/tests/mrcc/__init__.py new file mode 100644 index 0000000..cc9acd3 --- /dev/null +++ b/scine_puffin/tests/mrcc/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" diff --git a/scine_puffin/tests/mrcc/mrcc_test.py b/scine_puffin/tests/mrcc/mrcc_test.py new file mode 100644 index 0000000..207de6c --- /dev/null +++ b/scine_puffin/tests/mrcc/mrcc_test.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +import os + +from ..testcases import ( + JobTestCase, + skip_without +) + +from ..db_setup import ( + add_calculation, + add_structure +) + +from ..resources import resource_path + + +class MrccTests(JobTestCase): + + @skip_without('mrcc', 'database', 'readuct') + def test_mrcc_single_point(self): + from scine_puffin.jobs.scine_single_point import ScineSinglePoint + import scine_database as db + + # Setup DB for calculation + water = os.path.join(resource_path(), "water.xyz") + structure = add_structure(self.manager, water, db.Label.USER_GUESS) + model = db.Model('dft', 'pbe-d3bj', 'def2-svp') + model.temperature = "" + model.pressure = "" + model.electronic_temperature = "" + model.program = "mrcc" + settings = { + "require_charges": False + } + job = db.Job('scine_single_point') + calculation = add_calculation(self.manager, model, job, [structure.id()], settings) + + # Run calculation/job + config = self.get_configuration() + job = ScineSinglePoint() + job.prepare(config["daemon"]["job_dir"], calculation.id()) + self.run_job(job, calculation, config) + + # Check results + assert calculation.get_status() == db.Status.COMPLETE + assert structure.has_property("electronic_energy") + energy_props = structure.get_properties("electronic_energy") + assert len(energy_props) == 1 + results = calculation.get_results() + assert energy_props[0] in results.property_ids diff --git a/scine_puffin/tests/resources/FeO_H2.json b/scine_puffin/tests/resources/FeO_H2.json new file mode 100644 index 0000000..e6a14e2 --- /dev/null +++ b/scine_puffin/tests/resources/FeO_H2.json @@ -0,0 +1,5 @@ +{ + "masm_decision_list" : "", + "masm_cbor_graph" : "pGFhgqVhYQBhYwJhb4GCAAFhcqNhbIKBAYEDYmxygoEAgQFhc4KBAYEDYXMApWFhAGFjA2FvgYIAAWFyo2FsgoEAgQJibHKCgQCBAWFzgoEAgQJhcwFhYw9hZ6JhRYODAAMAgwECAIMCAwBhWoQBAQgYGmF2gwECAQ==", + "masm_idx_map" : "[(0, 3), (0, 2), (0, 0), (0, 1)]" +} diff --git a/scine_puffin/tests/resources/FeO_H2.xyz b/scine_puffin/tests/resources/FeO_H2.xyz new file mode 100644 index 0000000..59f5715 --- /dev/null +++ b/scine_puffin/tests/resources/FeO_H2.xyz @@ -0,0 +1,6 @@ +4 + +Fe 0.6399150479 0.7133206784 0.7012062755 +O 0.1061174002 -0.3397098090 -0.5710346763 +H -0.5567387334 0.5413806809 1.1339136517 +H -0.1892937148 -0.9149915503 -1.2640852509 diff --git a/scine_puffin/tests/resources/FeO_H2_lhs.json b/scine_puffin/tests/resources/FeO_H2_lhs.json new file mode 100644 index 0000000..ee1f9c9 --- /dev/null +++ b/scine_puffin/tests/resources/FeO_H2_lhs.json @@ -0,0 +1,5 @@ +{ + "masm_decision_list" : "", + "masm_cbor_graph" : "pGFhgaVhYQBhYwNhb4GCAAFhcqNhbIKBAIIBAmJscoKBAIEBYXOCggECgQBhcwBhYw9hZ6JhRYSDAAMAgwECAIMBAwaDAgMGYVqECAEBGBphdoMBAgE=", + "masm_idx_map" : "[(0, 3), (0, 0), (0, 1), (0, 2)]" +} diff --git a/scine_puffin/tests/resources/FeO_H2_lhs.xyz b/scine_puffin/tests/resources/FeO_H2_lhs.xyz new file mode 100644 index 0000000..6a8b3ae --- /dev/null +++ b/scine_puffin/tests/resources/FeO_H2_lhs.xyz @@ -0,0 +1,6 @@ +4 + +Fe 0.3329697985 -0.0620608456 -0.3959421243 +O 1.3622755765 -0.2536676437 -1.6201404547 +H -1.0599662461 -0.1485323268 0.8774437201 +H -0.6352791289 0.4642608161 1.1386388589 diff --git a/scine_puffin/tests/resources/__init__.py b/scine_puffin/tests/resources/__init__.py index b7872e3..f1d8829 100644 --- a/scine_puffin/tests/resources/__init__.py +++ b/scine_puffin/tests/resources/__init__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/tests/resources/acetal_methanol_complex.xyz b/scine_puffin/tests/resources/acetal_methanol_complex.xyz new file mode 100644 index 0000000..ebf1306 --- /dev/null +++ b/scine_puffin/tests/resources/acetal_methanol_complex.xyz @@ -0,0 +1,32 @@ +30 + +O -1.818713591267 1.134530806303 1.944224098816 +H -1.230313831718 0.993116170077 1.182912854588 +C -1.601230319842 2.432652761433 2.444563678689 +H -1.810367820785 3.196058637216 1.688110254359 +H -0.573920930449 2.556787975109 2.803436106303 +H -2.287879344328 2.566878819458 3.279880245127 +C 2.152686054191 -0.402606974185 -0.426750213813 +C 0.633936307744 -0.471868639783 -0.662123127034 +H 2.603564638026 0.200022493161 -1.211592412825 +H 2.574595414409 -1.402047346449 -0.453975797511 +O -0.005962678610 0.724146902481 -0.232146146785 +H 2.355972834169 0.056663859908 0.536141316057 +N 0.047617939878 -1.595436749184 0.029783742816 +C 0.373089180199 -1.723438967649 1.440821988202 +C 0.393280946926 1.925939085901 -0.872128052344 +C -1.390793999880 -1.721031953392 -0.178194050492 +H -1.665741103037 -2.763826458193 -0.017783445749 +H 1.448885100829 -1.785978265972 1.576080430955 +H -0.071702762687 -2.653414689026 1.792207816109 +H -0.026749649488 -0.898428693627 2.042045080013 +H -1.627764852440 -1.440226764728 -1.201070480851 +H -1.965906128598 -1.088514725882 0.506210524805 +O 0.346729286921 -0.579102054948 -2.044175685445 +C 0.855967094510 -1.728248161242 -2.695171193672 +H 0.428895407406 1.791935102416 -1.953661741067 +H 1.367004017203 2.261672498947 -0.503348924391 +H -0.366074145312 2.664411314189 -0.621520435397 +H 0.668469122429 -2.625194289091 -2.102013618351 +H 1.928012697398 -1.629830983192 -2.889491163425 +H 0.320535981638 -1.789038216173 -3.641057516110 diff --git a/scine_puffin/tests/resources/au.json b/scine_puffin/tests/resources/au.json new file mode 100644 index 0000000..b86417b --- /dev/null +++ b/scine_puffin/tests/resources/au.json @@ -0,0 +1,5 @@ +{ + "masm_decision_list" : "", + "masm_cbor_graph" : "o2FjD2FnomFFmL2DAAEAgwACAIMAAwCDAAQAgwAFAIMABgCDABIAgwATAIMAFACDABUAgwAWAIMAFwCDAQIAgwEEAIMBBQCDAQ0AgwEOAIMBEwCDARQAgwEVAIMBFwCDARgYAIMBGBkAgwIDAIMCBgCDAg0AgwIOAIMCEwCDAhQAgwIVAIMCFgCDAhgYAIMCGBoAgwMEAIMDBQCDAw0AgwMOAIMDEgCDAxQAgwMVAIMDFgCDAxgZAIMDGBoAgwQGAIMEDQCDBA4AgwQSAIMEEwCDBBUAgwQXAIMEGBkAgwQYGgCDBQYAgwUNAIMFDgCDBRIAgwUUAIMFFgCDBRcAgwUYGACDBRgZAIMGDQCDBg4AgwYSAIMGEwCDBhYAgwYXAIMGGBgAgwYYGgCDBwgAgwcJAIMHCwCDBwwAgwcPAIMHEACDBxMAgwcUAIMHFQCDBxYAgwcXAIMHGBgAgwgJAIMICgCDCAwAgwgPAIMIEQCDCBIAgwgUAIMIFQCDCBYAgwgXAIMIGBkAgwkKAIMJCwCDCRAAgwkRAIMJEgCDCRMAgwkVAIMJFgCDCRcAgwkYGgCDCgsAgwoMAIMKDwCDChAAgwoSAIMKEwCDChQAgwoVAIMKGBkAgwoYGgCDCwwAgwsPAIMLEQCDCxIAgwsTAIMLFACDCxYAgwsYGACDCxgaAIMMEACDDBEAgwwSAIMMEwCDDBQAgwwXAIMMGBgAgwwYGQCDDRUAgw0WAIMNFwCDDRgYAIMNGBkAgw0YGgCDDhIAgw4TAIMOFACDDhgYAIMOGBkAgw4YGgCDDxAAgw8RAIMPFACDDxUAgw8WAIMPGBgAgw8YGQCDDxgaAIMQEQCDEBMAgxAVAIMQFwCDEBgYAIMQGBkAgxAYGgCDERIAgxEWAIMRFwCDERgYAIMRGBkAgxEYGgCDEhMAgxIUAIMSFgCDEhcAgxIYGQCDEhgaAIMTFACDExUAgxMXAIMTGBgAgxMYGgCDFBUAgxQWAIMUGBgAgxQYGQCDFRYAgxUXAIMVGBkAgxUYGgCDFhcAgxYYGACDFhgaAIMXGBgAgxcYGQCDGBgYGQCDGBgYGgCDGBkYGgBhWpgbGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPGWLPYXaDAQIB", + "masm_idx_map" : "[(0, 0), (0, 2), (0, 3), (0, 5), (0, 1), (0, 13), (0, 6), (0, 14), (0, 4), (0, 12), (0, 16), (0, 17), (0, 11), (0, 10), (0, 9), (0, 7), (0, 15), (0, 8), (0, 18), (0, 19), (0, 26), (0, 22), (0, 20), (0, 21), (0, 23), (0, 24), (0, 25)]" + } diff --git a/scine_puffin/tests/resources/au.xyz b/scine_puffin/tests/resources/au.xyz new file mode 100644 index 0000000..e6e9615 --- /dev/null +++ b/scine_puffin/tests/resources/au.xyz @@ -0,0 +1,29 @@ +27 +0 1 +Au -0.9825436050 -1.7023669119 2.4126482175 +Au -2.4573208143 0.8520495702 2.4124273697 +Au -3.9321000052 3.4063796292 2.4124543101 +Au 1.9669776894 -1.7023763393 2.4119131997 +Au 0.4922004040 0.8520400222 2.4116923496 +Au -0.9825787838 3.4063700692 2.4117194234 +Au 4.9163992663 -1.7024432930 2.4115952980 +Au 3.4416220152 0.8519731292 2.4113744367 +Au 1.9668425096 3.4063030276 2.4114019974 +Au -2.4584358359 -2.5548600050 -2.4112937535 +Au -3.9332140897 -0.0005154442 -2.4114652059 +Au -5.4079913424 2.5539217869 -2.4115819624 +Au 0.4909856752 -2.5549268883 -2.4116114473 +Au -0.9837925039 -0.0005823846 -2.4117831202 +Au -2.4585697916 2.5538548183 -2.4118998121 +Au 3.4405068853 -2.5549364498 -2.4123464003 +Au 1.9657287233 -0.0005919165 -2.4125181268 +Au 0.4909515132 2.5538453811 -2.4126348683 +Au -0.9830259437 -3.4057723790 0.0004067113 +Au -2.4578046476 -0.8513758105 0.0004891085 +Au -3.9325819326 1.7030129134 0.0002680791 +Au 1.9663703880 -3.4058538179 0.0000088947 +Au 0.4915920944 -0.8514571021 0.0000907993 +Au -0.9831852032 1.7029316043 -0.0001302402 +Au 4.9157669913 -3.4059351240 -0.0003892412 +Au 3.4409888170 -0.8515384288 -0.0003075279 +Au 1.9662115268 1.7028503435 -0.0005284886 diff --git a/scine_puffin/tests/resources/au_complex.xyz b/scine_puffin/tests/resources/au_complex.xyz new file mode 100644 index 0000000..46b0b77 --- /dev/null +++ b/scine_puffin/tests/resources/au_complex.xyz @@ -0,0 +1,31 @@ +29 + +Au 1.2296159739 2.1292058733 13.2499723271 +Au -0.2451612354 4.6836223553 13.2497514793 +Au -1.7199404262 7.2379524144 13.2497784197 +Au 4.1791372683 2.1291964459 13.2492373093 +Au 2.7043599829 4.6836128074 13.2490164592 +Au 1.2295807951 7.2379428543 13.2490435330 +Au 7.1285588452 2.1291294921 13.2489194076 +Au 5.6537815941 4.6835459144 13.2486985463 +Au 4.1790020885 7.2378758128 13.2487261070 +Au -0.2462762570 1.2767127801 8.4260303561 +Au -1.7210545107 3.8310573410 8.4258589037 +Au -3.1958317634 6.3854945720 8.4257421472 +Au 2.7031452541 1.2766458968 8.4257126623 +Au 1.2283670750 3.8309904005 8.4255409894 +Au -0.2464102127 6.3854276035 8.4254242975 +Au 5.6526664643 1.2766363353 8.4249777093 +Au 4.1778883023 3.8309808686 8.4248059829 +Au 2.7031110921 6.3854181662 8.4246892413 +Au 1.2291336352 0.4258004062 10.8377308209 +Au -0.2456450687 2.9801969746 10.8378132181 +Au -1.7204223537 5.5345856986 10.8375921888 +Au 4.1785299670 0.4257189673 10.8373330043 +Au 2.7037516734 2.9801156831 10.8374149089 +Au 1.2289743757 5.5345043894 10.8371938694 +Au 7.1279265703 0.4256376612 10.8369348684 +Au 5.6531483960 2.9800343564 10.8370165817 +Au 4.1783711058 5.5344231287 10.8367956210 +H 1.5736804490 2.6532892261 16.4769422122 +H 1.5968851263 2.6764939033 17.2340217575 diff --git a/scine_puffin/tests/resources/h2.json b/scine_puffin/tests/resources/h2.json new file mode 100644 index 0000000..5ae008e --- /dev/null +++ b/scine_puffin/tests/resources/h2.json @@ -0,0 +1,5 @@ +{ + "masm_decision_list" : "", + "masm_cbor_graph" : "o2FjD2FnomFFgYMAAQBhWoIBAWF2gwECAQ==", + "masm_idx_map" : "[(0, 0), (0, 1)]" +} diff --git a/scine_puffin/tests/resources/h2.xyz b/scine_puffin/tests/resources/h2.xyz new file mode 100644 index 0000000..83795ab --- /dev/null +++ b/scine_puffin/tests/resources/h2.xyz @@ -0,0 +1,4 @@ +2 +0 1 +H -0.0000000000 0.0000000000 -0.3788952204 +H 0.0000000000 -0.0000000000 0.3788952204 diff --git a/scine_puffin/tests/resources/h2o2.json b/scine_puffin/tests/resources/h2o2.json new file mode 100644 index 0000000..23f7084 --- /dev/null +++ b/scine_puffin/tests/resources/h2o2.json @@ -0,0 +1,5 @@ +{ + "masm_decision_list" : "(175, -179, -174, 1)", + "masm_cbor_graph" : "pGFhgqVhYQBhYwJhb4GCAAFhcqNhbIKBAYEDYmxygoEAgQFhc4KBAYEDYXMBpWFhAGFjA2FvgYIAAWFyo2FsgoEAgQJibHKCgQCBAWFzgoEAgQJhcwFhYw9hZ6JhRYODAAMAgwECAIMCAwBhWoQBAQgIYXaDAQIB", + "masm_idx_map" : "[(0, 2), (0, 1), (0, 3), (0, 0)]" +} diff --git a/scine_puffin/tests/resources/h2o2.xyz b/scine_puffin/tests/resources/h2o2.xyz new file mode 100644 index 0000000..0a2adb3 --- /dev/null +++ b/scine_puffin/tests/resources/h2o2.xyz @@ -0,0 +1,6 @@ +4 +0 1 +O -0.0501872324 -0.4574961186 -0.5423611208 +H -0.9293736701 -0.2599269352 -0.9006162977 +O 0.0501380410 0.4572277977 0.5424409929 +H 0.9294228616 0.2601952561 0.9005364255 diff --git a/scine_puffin/tests/resources/h2o2_distorted.xyz b/scine_puffin/tests/resources/h2o2_distorted.xyz new file mode 100644 index 0000000..dc10135 --- /dev/null +++ b/scine_puffin/tests/resources/h2o2_distorted.xyz @@ -0,0 +1,6 @@ +4 +0 1 +O 0.0 0.0 -0.75 +O 0.0 0.0 0.75 +H 1.0 0.0 -0.25 +H 0.0 1.0 -0.75 diff --git a/scine_puffin/tests/resources/hio3.json b/scine_puffin/tests/resources/hio3.json new file mode 100644 index 0000000..e3ff365 --- /dev/null +++ b/scine_puffin/tests/resources/hio3.json @@ -0,0 +1,5 @@ +{ + "masm_decision_list" : "(150, 156, 161, 1)", + "masm_cbor_graph" : "pGFhgqVhYQBhYwNhb4GCAAFhcqNhbIKBAIEEYmxygoEAgQFhc4KBAIEEYXMBpWFhAGFjBGFvgYMAAQJhcqNhbIOBAYECgQNibHKCgQKCAAFhc4KBA4IBAmFzA2FjD2FnomFFhIMAAwCDAQQAgwIEAIMDBABhWoUBCAgIGT+1YXaDAQIB", + "masm_idx_map" : "[(0, 3), (0, 0), (0, 1), (0, 4), (0, 2)]" +} diff --git a/scine_puffin/tests/resources/hio3.xyz b/scine_puffin/tests/resources/hio3.xyz new file mode 100644 index 0000000..eb68adb --- /dev/null +++ b/scine_puffin/tests/resources/hio3.xyz @@ -0,0 +1,7 @@ +5 +0 1 +O 1.2039183970 -0.2141875471 -0.0296973503 +H 1.7484203023 0.0646907419 0.7233718756 +O -0.9945616081 0.4638335512 -1.6214092333 +I -0.5494980064 0.5014572548 0.0721937479 +O -1.4082790848 -0.8157940008 0.8555409601 diff --git a/scine_puffin/tests/resources/methanol_enamine_complex.xyz b/scine_puffin/tests/resources/methanol_enamine_complex.xyz new file mode 100644 index 0000000..d99a27a --- /dev/null +++ b/scine_puffin/tests/resources/methanol_enamine_complex.xyz @@ -0,0 +1,32 @@ +30 + +O -1.051297586091 1.136639770894 1.277696965527 +H -0.459062349844 1.462993004481 0.505416411917 +C -1.932184900980 2.168305647080 1.617848968721 +H -2.488548351664 2.530637314817 0.744801868237 +H -1.403106779474 3.020005870054 2.067304749340 +H -2.641429723591 1.773548950994 2.349476015642 +C 1.651361129150 -0.529803996241 -0.362034285772 +C 0.496118786136 -1.379211251098 -0.546098870638 +H 2.371591867576 -0.620402165017 -1.167945370228 +H 2.131181259700 -0.620499249374 0.600601730710 +O 0.462549592971 1.915495593148 -0.656453530504 +H 1.246908635640 0.551546916436 -0.445096775995 +N -0.121520473688 -1.991376948889 0.442106568575 +C 0.263512969397 -1.788575355775 1.822444536389 +C 1.108140499055 3.086726972486 -0.384159018002 +C -1.365744769097 -2.704826496043 0.247400952177 +H -1.287219402709 -3.697467734259 0.691929121745 +H 0.039178010036 -0.760588576389 2.115523899454 +H 1.323054939274 -1.989749799538 1.969758378649 +H -0.306796194005 -2.470587159062 2.446770050896 +H -1.587017008703 -2.802783895008 -0.811062831735 +H -2.174837519804 -2.156472189144 0.732959231694 +O -0.037936062201 -1.556770246884 -1.746939740474 +C 0.416260883444 -0.756620715346 -2.829578451855 +H 0.416385275797 3.947777556555 -0.446712687341 +H 1.928448005708 3.275586121347 -1.102523026010 +H 1.552753833444 3.100041523535 0.629901768589 +H 1.415647962048 -1.055842025517 -3.150757577973 +H 0.410555268334 0.299200535847 -2.550073836638 +H -0.291386340507 -0.931870056174 -3.636489716575 diff --git a/scine_puffin/tests/resources/peroxide.json b/scine_puffin/tests/resources/peroxide.json new file mode 100644 index 0000000..2cfe01d --- /dev/null +++ b/scine_puffin/tests/resources/peroxide.json @@ -0,0 +1,5 @@ +{ + "masm_decision_list": "", + "masm_cbor_graph": "pGFhhqVhYQBhYwRhb4GCAAFhcqRhbIKBB4EJY2xua4GiYXCCAAFjc2VxhgQHCAYFCWJscoKBAYEAYXOCgQmBB2FzAaVhYQBhYwVhb4GCAAFhcqRhbIKBBoEJY2xua4GiYXCCAAFjc2VxhgUGCAcECWJscoKBAYEAYXOCgQmBBmFzAaVhYQBhYwZhb4GCAAFhcqRhbIKBBYEIY2xua4GiYXCCAAFjc2VxhgYFCQQHCGJscoKBAYEAYXOCgQiBBWFzAaVhYQBhYwdhb4GCAAFhcqRhbIKBBIEIY2xua4GiYXCCAAFjc2VxhgcECQUGCGJscoKBAYEAYXOCgQiBBGFzAaVhYQBhYwhhb4GEAAECA2FypGFshIECgQOBBoEHY2xua4GiYXCCAgNjc2VxhggGBQkEB2JscoKCAAGCAgNhc4KCAgOCBgdhcwWlYWEAYWMJYW+BhAABAgNhcqRhbISBAIEBgQSBBWNsbmuBomFwggIDY3NlcYYJBAcIBgVibHKCggABggIDYXOCggABggQFYXMFYWMPYWeiYUWKgwAJAIMBCQCDAggAgwMIAIMEBwCDBAkAgwUGAIMFCQCDBggAgwcIAGFaigEBAQEICAgIBgZhdoMCAAA=", + "masm_idx_map": "[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9)]" +} \ No newline at end of file diff --git a/scine_puffin/tests/resources/peroxide.xyz b/scine_puffin/tests/resources/peroxide.xyz new file mode 100644 index 0000000..69b8681 --- /dev/null +++ b/scine_puffin/tests/resources/peroxide.xyz @@ -0,0 +1,12 @@ +10 + +C 0.6564298697 1.1596540408 -0.2620802273 +O -0.6801687991 1.1622761797 0.2886486810 +O -1.3436355647 0.0340145939 -0.2798102791 +C -0.6830223834 -1.1326872360 0.2607484053 +O 0.6535771712 -1.1353098584 -0.2899782528 +O 1.3170436088 -0.0070462737 0.2784776686 +H 1.1625132841 2.0200216111 0.2009400898 +H 0.6358666957 1.1340816815 -1.3594381839 +H -1.1891077689 -1.9930554787 -0.2022647061 +H -0.6624561134 -1.1070992602 1.3581068044 diff --git a/scine_puffin/tests/resources/water_distorted.xyz b/scine_puffin/tests/resources/water_distorted.xyz new file mode 100644 index 0000000..6758de4 --- /dev/null +++ b/scine_puffin/tests/resources/water_distorted.xyz @@ -0,0 +1,5 @@ +3 + +O 0.0 0.0 0.0 +H 1.0 0.0 0.0 +H 0.5 0.5 0.0 \ No newline at end of file diff --git a/scine_puffin/tests/resources/water_distorted_2.xyz b/scine_puffin/tests/resources/water_distorted_2.xyz new file mode 100644 index 0000000..8136e5f --- /dev/null +++ b/scine_puffin/tests/resources/water_distorted_2.xyz @@ -0,0 +1,5 @@ +3 + +O 0.0 0.0 0.0 +H 1.0 0.0 0.0 +H -0.5 0.5 0.0 \ No newline at end of file diff --git a/scine_puffin/tests/testcases.py b/scine_puffin/tests/testcases.py index 6bf8d00..a64dcc1 100644 --- a/scine_puffin/tests/testcases.py +++ b/scine_puffin/tests/testcases.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -24,6 +24,11 @@ def module_exists(module_name: str) -> bool: return os.getenv("ORCA_BINARY_PATH") is not None elif module_name.lower() == "turbomole": return os.getenv("TURBODIR") is not None + elif module_name.lower() == "ams": + possibles = ['AMSHOME', 'AMSBIN', 'AMS_BINARY_PATH'] + return any(os.getenv(p) is not None for p in possibles) + elif module_name.lower() == "mrcc": + return os.getenv("MRCC_BINARY_PATH") is not None else: return module_name in (name for loader, name, ispkg in iter_modules()) @@ -38,7 +43,8 @@ def _skip(func: Callable, error: str): def dependency_addition(dependencies: List[str]) -> List[str]: # allow to give scine packages without 'scine_' prefix - short_terms = ['readuct', 'swoose', 'sparrow', 'molassembler', 'database', 'utilities', 'kinetx', 'xtb_wrapper'] + short_terms = ['readuct', 'swoose', 'sparrow', 'molassembler', 'database', 'utilities', 'kinetx', 'xtb_wrapper', + 'ams_wrapper', 'serenity_wrapper', 'dftbplus_wrapper'] dependencies = ['scine_' + d if d in short_terms else d for d in dependencies] # dependencies of key as value list, only utilities must not be included dependency_data = { @@ -61,6 +67,7 @@ def wrap(f: Callable): if all(module_exists(d) for d in dependency_list): @wraps(f) def wrapped_f(*args, **kwargs): + calculator_import_resolve(dependency_list) f(*args, **kwargs) return wrapped_f else: @@ -69,6 +76,23 @@ def wrapped_f(*args, **kwargs): return wrap +def calculator_import_resolve(dependency_list: List[str]) -> None: + # ensure that calculators can be loaded + for d in dependency_list: + if d == "scine_sparrow": + import scine_sparrow # noqa # pylint: disable=(unused-import,import-error) + elif d == "scine_ams_wrapper": + import scine_ams_wrapper # noqa # pylint: disable=(unused-import,import-error) + elif d == "scine_dftbplus_wrapper": + import scine_dftbplus_wrapper # noqa # pylint: disable=(unused-import,import-error) + elif d == "scine_serenity_wrapper": + import scine_serenity_wrapper # noqa # pylint: disable=(unused-import,import-error) + elif d == "scine_swoose": + import scine_swoose # noqa # pylint: disable=(unused-import,import-error) + elif d == "scine_xtb_wrapper": + import scine_xtb_wrapper # noqa # pylint: disable=(unused-import,import-error) + + class JobTestCase(unittest.TestCase): def __init__(self, *args, **kwargs): @@ -84,7 +108,9 @@ def tearDown(self): if module_exists("scine_database"): self.manager.wipe() os.chdir(self.start_dir) - shutil.rmtree(os.path.join(os.getcwd(), "puffin_unittest_scratch")) + work_dir = os.path.join(os.getcwd(), "puffin_unittest_scratch") + if os.path.exists(work_dir): + shutil.rmtree(work_dir) def get_calculation(self, query: Union[Dict[str, str], None] = None): calculations = self.manager.get_collection("calculations") @@ -118,6 +144,12 @@ def get_configuration(self): if os.getenv("CP2K_BINARY_PATH") is not None: config['programs']['cp2k']['available'] = True config['programs']['cp2k']['root'] = os.getenv("CP2K_BINARY_PATH") + if os.getenv("AMSHOME") is not None: + config['programs']['ams']['available'] = True + config['programs']['ams']['root'] = os.getenv("AMSHOME") + if os.getenv("MRCC_BINARY_PATH") is not None: + config['programs']['mrcc']['available'] = True + config['programs']['mrcc']['root'] = os.getenv("MRCC_BINARY_PATH") return config @@ -125,6 +157,8 @@ def run_job(self, job, calculation, config): try: success = job.run(self.manager, calculation, config) assert success + job.clear() except BaseException as e: print(calculation.get_comment()) + job.clear() raise e diff --git a/scine_puffin/utilities/__init__.py b/scine_puffin/utilities/__init__.py index 4d46073..d85c6e5 100644 --- a/scine_puffin/utilities/__init__.py +++ b/scine_puffin/utilities/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/utilities/compound_and_flask_helpers.py b/scine_puffin/utilities/compound_and_flask_helpers.py index 5cd9bdf..e17e942 100644 --- a/scine_puffin/utilities/compound_and_flask_helpers.py +++ b/scine_puffin/utilities/compound_and_flask_helpers.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/utilities/kinetic_modeling_sensitivity_analysis.py b/scine_puffin/utilities/kinetic_modeling_sensitivity_analysis.py new file mode 100644 index 0000000..83d4517 --- /dev/null +++ b/scine_puffin/utilities/kinetic_modeling_sensitivity_analysis.py @@ -0,0 +1,576 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from typing import Optional, Any, Tuple, List +import numpy as np + +from .rms_kinetic_model import RMSKineticModel + + +class RMSKineticModelingSensitivityAnalysis: + """ + Provides a Wrapper around SALib for sensitivity analysis of the RMS kinetic modeling output. + + SALib samples the parameters (activation energies and enthalpies) within the uncertainty around input value. + The input parameters are sampled uniformly distribution of the parameters. + + To ensure that we can use multiprocessing, we cannot work with any Julia objects in the main thread. Julia objects + may only be constructed after starting the parallel loop. + + Parameters: + ----------- + rms_kinetic_model: RMSKineticModel + The microkinetic model. + n_cores: int + Number of cores to run the sensitivity analysis on. Note that if n > 1, RMS must not have been instantiated + before in the python main process because Julia runs into trouble otherwise. + sample_size: int + Number of samples for Morris (5 - 25) or Sobol ( > 500) analysis. + distribution_shape: str (default 'unif') + Shape of the parameter distribution to be assumed. Options are uniform distribution between error bounds + ('unif') and truncated normal distributions ('truncnorm'). The normal distributions is only truncated to + ensure non-negative reaction barriers. The standard deviation for the normal distribution is taken as the + standard deviation of the error bounds and the normal distribution's mean is taken as the baseline + parameter in the microkinetic model. Note that a non-uniform distribution may lead to rather strange + parameter sampling if Morris sampling is used because it constructs discrete parameter levels within the + distribution. + + TODO: The number of level for Morris sampling should be an input argument. + """ + def __init__(self, rms_kinetic_model: RMSKineticModel, n_cores: int, sample_size: int, + distribution_shape: str = 'unif'): + self.rms_model = rms_kinetic_model + self.n_cores = n_cores + self.sample_size = sample_size + self._problem: Optional[Any] = None + self._morris_max_mu: Optional[np.ndarray] = None + self._morris_max_mu_star: Optional[np.ndarray] = None + self._morris_max_sigma: Optional[np.ndarray] = None + self._sobol_max_total: Optional[np.ndarray] = None + self._sobol_max_s1: Optional[np.ndarray] = None + self._full_to_reduced_parameter_mapping: Optional[List[Tuple[int, int]]] = None + self._reduced_to_full_parameter_mapping: Optional[List[Tuple[int, int]]] = None + self.sensitivity_times: Optional[List[float]] = None + distribution_options = ['unif', 'truncnorm'] + if distribution_shape not in distribution_options: + raise RuntimeError(f"The distribution shape for the kinetic modeling parameter must be in" + f" {distribution_options}.") + self._distribution_shape = distribution_shape + self.include_fluxes: bool = True + + def _define_sampling_problem(self): + """ + Create problem specification object for SALib. + """ + # pylint: disable=import-error + from SALib import ProblemSpec + # pylint: enable=import-error + n_aggregates = self.rms_model.get_n_aggregates(with_solvent=False) + n_rxn = self.rms_model.get_n_reactions() + # Bounds (lower, upper) for uniform distributions. + # Bounds (lower, upper, mean, std-dev) for truncated normal distributions. + bounds = self.get_parameter_bounds() + if self._distribution_shape == 'truncnorm': + bounds = self.get_parameter_mean_and_std_dev_bounds() + distributions = [self._distribution_shape for _ in bounds] + n_outputs = self.get_n_total_output() + outputs = ['c_max_' + str(i) for i in range(n_aggregates)] + ['c_' + str(i) for i in range(n_aggregates)] + if self.include_fluxes: + outputs += ['c_flux_' + str(i) for i in range(n_aggregates)] + if len(outputs) < n_outputs: + n_sets = int((n_outputs - len(outputs)) / n_aggregates) + for i_set in range(n_sets): + outputs += ['ct' + str(i_set) + "_" + str(i) for i in range(n_aggregates)] + assert len(outputs) == n_outputs + f2r_mapping = self.get_reduced_parameter_mapping() + full_names = ['h' + str(i) for i in range(n_aggregates)] + ['ea' + str(i) for i in range(n_rxn)] + problem = ProblemSpec({ + 'names': [full_names[full_index] for full_index, _ in f2r_mapping], + 'bounds': bounds, + 'outputs': outputs, + 'dists': distributions + }) + return problem + + def analyse_runs(self) -> List[Tuple[np.ndarray, np.ndarray]]: + """ + Calculate mean and variance of the outputs of the sensitivity analysis runs. + + Return + ------ + Returns a list of tuples (tuple[0] -> mean ; tuple[1] -> variance) of the different sensitivity analysis + outputs (these can be aggregate-wise maximum concentrations, fluxes or just concentrations at specific time + points). + """ + if self.get_analysis().results is None: + raise RuntimeError("Run the sensitivity analysis first, please.") + all_results = self.get_analysis().results # rows -> runs | cols -> outputs + n_aggregates = self.rms_model.get_n_aggregates(with_solvent=False) + n_outputs = all_results.shape[1] + n_sets = int(n_outputs / n_aggregates) + assert n_sets * n_aggregates == n_outputs + separated_output = [all_results[:, int(i * n_aggregates): int((i + 1) * n_aggregates)] for i in range(n_sets)] + return [(np.mean(out, axis=0), np.var(out, axis=0)) for out in separated_output] + + def morris_sensitivities(self): + """ + Run Morris sensitivity analysis. The number of model evaluations is M = N(p + 1), where N is the number of + samples for each parameter and p is the number of parameters (number of reactions + number of aggregates). + The number of samples (N) is typically between 5 and 25. + + Return + ------ + Returns the sensitivity measures for the maximum and final concentrations for each parameter as a dictionary. + + The measures are: + max_mu: Maximum value of the Morris sensitivity measure mu for the parameter and maximum/final concentrations. + max_mu_star: Maximum value of the Morris sensitivity measure mu* for the parameter and maximum/final + concentrations. + max_sigma: Maximum value of the Morris sensitivity measure sigma for the parameter and maximum/final + concentrations. + """ + problem = self._define_sampling_problem() + # pylint: disable=no-member + if self._distribution_shape == "unif" and self.get_n_parameters() < 1e+3: + # We use the trajectory selection by Ruano et al. https://doi.org/10.1016/j.envsoft.2012.03.008 + problem.sample_morris(min(500, self.sample_size * 10), optimal_trajectories=max(2, self.sample_size), + local_optimization=True) + else: + problem.sample_morris(self.sample_size) + # pylint: enable=no-member + print("Morris' method, number of model evaluations:", len(problem.samples)) + # Multiprocessing is somewhat difficult with Julia. Since the julia-code is compiled specifically for the + # process we have to make sure that Julia is only ever imported in a sub-process and never in the main process. + self.evaluate_salib_parallel(problem, self.salib_wrapped_kinetic_modeling, nprocs=self.n_cores) + print("Model evaluations done! Analyzing output.") + # pylint: disable=no-member + outputs = problem.analyze_morris().to_df() + self._problem = problem + # pylint: enable=no-member + self._morris_max_mu = self._result_wise_abs_max(outputs, 'mu') + self._morris_max_mu_star = self._result_wise_abs_max(outputs, 'mu_star') + self._morris_max_sigma = self._result_wise_abs_max(outputs, 'sigma') + return self._morris_max_mu, self._morris_max_mu_star, self._morris_max_sigma, outputs + + def sobol_sensitivities(self): + """ + Run Sobol sensitivity analysis (with Saltelli samping). The number of model evaluations is M = N(p + 2), where + N is the number of samples for each parameter and p is the number of parameters (number of reactions + number + of aggregates). Sample size (N) should be 500 or larger (depending on the number of model parameters). + """ + self._problem = self._define_sampling_problem() + # pylint: disable=no-member + self._problem.sample_saltelli(self.sample_size, calc_second_order=False) + # pylint: enable=no-member + self._problem.evaluate(self.salib_wrapped_kinetic_modeling, nprocs=self.n_cores) + print("Model evaluations done! Analyzing output.") + # pylint: disable=no-member + outputs = self._problem.analyze_sobol(calc_second_order=False).to_df() + # pylint: enable=no-member + self._sobol_max_total = self._result_wise_abs_max(outputs, 'ST') + self._sobol_max_s1 = self._result_wise_abs_max(outputs, 'S1') + return self._sobol_max_total, self._sobol_max_s1, outputs + + @staticmethod + def evaluate_salib_parallel(problem, func, nprocs=None): + """Evaluate model locally in parallel. + All detected processors will be used if `nprocs` is None. + + This is a reduced version of SALib's evaluate_parallel function. We need this reimplementation to + better handle the parallelization itself. The size of the class and all parameters given the the + individual threads must not become too large. Otherwise, we may be unable to pickle it. + + Parameters + ---------- + problem : ProblemSpec + The SALib problem spec. + func : function, + The evaluation function. + nprocs : int, + The number of processes. + """ + from multiprocessing import Pool, cpu_count + if problem._samples is None: + raise RuntimeError("Sampling not yet conducted") + + max_procs = cpu_count() + if nprocs is None: + nprocs = max_procs + else: + nprocs = min(max_procs, nprocs) + + # Split into chunks. The chunk sizes should not become too large to avoid requiring too much memory for pickle + # to handle. + if problem._samples.shape[0] > nprocs * 1e+2: + n_chunks = int(problem._samples.shape[0] / 100) + else: + n_chunks = nprocs + chunks = np.array_split(problem._samples, n_chunks, axis=0) + + with Pool(nprocs) as pool: + res = list(pool.imap(func, chunks)) + + problem.results = problem._collect_results(res) + + return + + def get_n_parameters(self) -> int: + """ + Getter for the number of microkinetic model parameters. + """ + return self.rms_model.get_n_parameters() + + def get_reduced_parameter_mapping(self): + """ + Getter for the mapping between full parameter list and prescreened parameter list. + """ + if self._full_to_reduced_parameter_mapping is None: + return [(i, i) for i in range(self.get_n_parameters())] + return self._full_to_reduced_parameter_mapping + + def set_prescreening_condition(self, vertex_flux: np.ndarray, edge_flux: np.ndarray, vertex_t: float, + edge_t: float): + """ + Set a prescreeining condition to reduce the parameters sampled during sensitivity analysis. + Note: At the moment this will only affect the one-at-a-time analysis. + """ + reduced_parameter_indices: List[Tuple[int, int]] = [] + reduced_index = 0 + n_enthalpies = len(self.rms_model.uq_h_lower) + n_ea = len(self.rms_model.uq_ea_lower) + for i in range(n_enthalpies): + if abs(vertex_flux[i]) > vertex_t: + reduced_parameter_indices.append((i, reduced_index)) + reduced_index += 1 + for i in range(n_ea): + if abs(edge_flux[i]) > edge_t: + reduced_parameter_indices.append((i + n_enthalpies, reduced_index)) + reduced_index += 1 + self._full_to_reduced_parameter_mapping = reduced_parameter_indices + + def get_parameter_bounds(self) -> List[List[float]]: + """ + Create the parameter bound list to represent a uniform parameter distribution around the baseline. + """ + ea_bounds = [[max(0.0, ea - lower), ea + max(upper, 1.0)] for lower, upper, ea in + zip(self.rms_model.uq_ea_lower, self.rms_model.uq_ea_upper, self.rms_model.ea)] + h_bounds = [[h - max(lower, 1.0), h + max(upper, 1.0)] for lower, upper, h in + zip(self.rms_model.uq_h_lower, self.rms_model.uq_h_upper, self.rms_model.h)] + f2r_mapping = self.get_reduced_parameter_mapping() + p_bounds: List[List[float]] = [] + full_p_bounds = h_bounds + ea_bounds + for full_i, _ in f2r_mapping: + p_bounds.append(full_p_bounds[full_i]) + return p_bounds + + def get_parameter_mean_and_std_dev_bounds(self) -> List[List[float]]: + """ + Convert the error bound list to a list of mean, standard deviation, and parameter range to represent a truncated + normal distribution around the baseline. + """ + ea_mean_std = [[ea, max(abs(lower + upper) / 2, 0.5)] for lower, upper, ea in + zip(self.rms_model.uq_ea_upper, self.rms_model.uq_ea_upper, self.rms_model.ea)] + h_mean_std = [[h, max(abs(lower + upper) / 2, 0.5)] for lower, upper, h in + zip(self.rms_model.uq_h_lower, self.rms_model.uq_h_upper, self.rms_model.h)] + ea_lower_shifts = [ea - max(0.0, ea - 10 * lower) for lower, ea in + zip(self.rms_model.uq_ea_lower, self.rms_model.ea)] + ea_bounds = [[max(0.0, ea - shift), ea + max(shift, 1.0)] for shift, ea in + zip(ea_lower_shifts, self.rms_model.ea)] + h_bounds = [[h - max(10 * lower, 1.0), h + max(10 * upper, 1.0)] for lower, upper, h in + zip(self.rms_model.uq_h_lower, self.rms_model.uq_h_upper, self.rms_model.h)] + full_bounds = [a + b for a, b in zip(h_bounds + ea_bounds, h_mean_std + ea_mean_std)] + + # map to reduced parameter set. + p_bounds: List[List[float]] = [] + f2r_mapping = self.get_reduced_parameter_mapping() + for full_i, _ in f2r_mapping: + p_bounds.append(full_bounds[full_i]) + return p_bounds + + def get_local_sensitivity_samples(self) -> Tuple[np.ndarray, List[int]]: + """ + Getter for the local sensitivity samples (parameter combinations). Parameters are distorted by their error + bounds one-at-a-time from the baseline. + """ + from copy import deepcopy + parameter_bounds = self.get_parameter_bounds() + f2r_mapping = self.get_reduced_parameter_mapping() + full_parameters = self.rms_model.get_all_parameters() + reduced_parameters = self._full_to_reduced_parameters(full_parameters, f2r_mapping, len(full_parameters)) + samples = [] + parameter_indices = [] + n_agg = self.rms_model.get_n_aggregates(with_solvent=False) + assert len(f2r_mapping) == len(parameter_bounds) + for (full_i, reduced_i), (lower, upper) in zip(f2r_mapping, parameter_bounds): + p = deepcopy(reduced_parameters) + p[reduced_i] = upper + samples.append(p) + parameter_indices.append(full_i) + if full_i >= n_agg and self.rms_model.ea[full_i - n_agg] < 1.0: # no point in lowering 0.0 barriers. + continue + p = deepcopy(reduced_parameters) + p[reduced_i] = lower + samples.append(p) + parameter_indices.append(full_i) + return np.array([s for s in samples]), parameter_indices + + def one_at_a_time_differences(self, vertex_fluxes: np.ndarray, edge_fluxes: np.ndarray, vertex_threshold: float, + edge_threshold: float, flux_replace: float, ref_max: np.ndarray, + ref_final: np.ndarray): + """ + Run one-at-a-time local sensitivity analysis. The parameters are distorted from the base line one at a time by + the error bounds provided for each parameter. The maximum change in max concentrations, final concentrations and + concentration flux is provided as a result. + + Parameters + ---------- + vertex_fluxes :: np.ndarray + The vertex fluxes of the baseline model (the model with all parameters as their default). + edge_fluxes :: np.ndarray + The edge fluxes of the baseline model. + vertex_threshold :: float + Vertex fluxes over this values are considered high and reduced to flux_replace. This should remove + absolutely large but unimportant changes of the fluxes. + edge_threshold :: float + Edge fluxes over this values are considered high and reduced to flux_replace. This should remove absolutely + large but unimportant changes of the fluxes. + flux_replace :: float + The flux replacement value. + ref_max :: np.ndarray + The maximum concentrations of the baseline model. + ref_final :: np.ndarray + The final concentrations of the baseline model. + """ + from multiprocessing import Pool + from copy import deepcopy + self.set_prescreening_condition(vertex_fluxes, edge_fluxes, vertex_threshold, edge_threshold) + samples, parameter_indices = self.get_local_sensitivity_samples() + n_samples = samples.shape[0] + n_agg = self.rms_model.get_n_aggregates(with_solvent=False) + n_outputs = 3 * n_agg + n_params = self.get_n_parameters() + self.set_analysis_times([]) # No additional time points + self.include_fluxes = True + if self.n_cores > 1: + chunksizes = [int(n_samples / self.n_cores) for _ in range(self.n_cores)] + left_over = n_samples - sum(chunksizes) + chunksizes[0] += left_over + assert n_samples - sum(chunksizes) == 0 + chunks = [] + i_sample = 0 + for size in chunksizes: + chunks.append(samples[i_sample: i_sample + size, :]) + i_sample += size + with Pool(self.n_cores) as pool: + process_results = pool.imap(self.salib_wrapped_kinetic_modeling, [s for s in chunks]) + results = np.empty((n_samples, n_outputs)) + ind = 0 + for p_result in process_results: + n_sam = len(p_result) + results[ind: ind + n_sam, :] = p_result + ind += n_sam + else: + results = self.salib_wrapped_kinetic_modeling(samples) + + ref_flux = deepcopy(vertex_fluxes) + ref_flux[ref_flux > flux_replace] = flux_replace + sens_c_max = np.zeros(n_params) + sens_c_final = np.zeros(n_params) + sens_c_flux = np.zeros(n_params) + all_c_max = np.empty((n_samples + 1, n_agg)) + all_c_final = np.empty((n_samples + 1, n_agg)) + all_c_flux = np.empty((n_samples + 1, n_agg)) + # Note that the reference concentrations may still include the solvent species. Therefore, we only copy the + # first n_agg concentrations into the final array. + all_c_max[n_samples, :] = ref_max[:n_agg] + all_c_final[n_samples, :] = ref_final[:n_agg] + all_c_flux[n_samples, :] = vertex_fluxes[:n_agg] + all_c_max[:n_samples, :] = results[:, 0:n_agg] + all_c_final[:n_samples:, :] = results[:, n_agg: 2 * n_agg] + all_c_flux[:n_samples:, :] = results[:, 2 * n_agg:] + + for p_index, c_max, c_final, c_flux in zip(parameter_indices, all_c_max, all_c_final, all_c_flux): + c_flux[c_flux > flux_replace] = flux_replace + sens_c_max[p_index] = max(sens_c_max[p_index], np.max(np.abs(c_max - ref_max[:n_agg]))) + sens_c_final[p_index] = max(sens_c_final[p_index], np.max(np.abs(c_final - ref_final[:n_agg]))) + sens_c_flux[p_index] = max(sens_c_flux[p_index], np.max(np.abs(c_flux - ref_flux[:n_agg]))) + + var_final = np.var(all_c_final, axis=0) + var_max = np.var(all_c_max, axis=0) + var_flux = np.var(all_c_flux, axis=0) + return sens_c_max, sens_c_final, sens_c_flux, var_max, var_final, var_flux + + def get_analysis(self): + """ + Getter for the SALib problem object that contains all raw outputs and results if the analysis was done. + """ + if self._problem is None: + raise RuntimeError("The sensitivity analysis must be executed before any results are available.") + return self._problem + + def _collect_salib_output(self, outputs, metric_key: str) -> List[np.ndarray]: + """ + Collect the SALib output and separate them into sets containing one value per aggregate. + """ + n_aggregates = self.rms_model.get_n_aggregates(with_solvent=False) + # out_array: outputs x params --> take max along the rows for the specific row subblock. + if metric_key == 'ST': + out_array = np.array([out[0][metric_key] for out in outputs]) + elif metric_key == 'S1': + out_array = np.array([out[1][metric_key] for out in outputs]) + else: + out_array = np.array([out[metric_key] for out in outputs]) + n_sets = int(len(outputs) / n_aggregates) + assert n_sets * n_aggregates == len(outputs) + separated_output = [out_array[int(i * n_aggregates): int((i + 1) * n_aggregates), :] for i in range(n_sets)] + return separated_output + + def _result_wise_abs_max(self, outputs, metric_key: str): + """ + Extract the sensitivity indices from the output and calculate the absolute maximum value of the index for each + parameter. Maximum taken over all outputs. + """ + separated_output: List[np.ndarray] = self._collect_salib_output(outputs, metric_key) + metric_c_max = separated_output[0] + metric_c_final = separated_output[1] + mapping = self.get_reduced_parameter_mapping() + n_params = self.get_n_parameters() + return {'c_max': self._reduced_to_full_parameters(np.amax(np.abs(metric_c_max), axis=0), mapping, n_params), + 'c_final': self._reduced_to_full_parameters(np.amax(np.abs(metric_c_final), axis=0), mapping, n_params)} + + @staticmethod + def _reduced_to_full_parameters(metric: np.ndarray, mapping: List[Tuple[int, int]], + n_total_params: int) -> np.ndarray: + """ + Parameters + ---------- + metric :: np.ndarray + The metric in the reduced parameter set. + mapping :: List[Tuple[int, int]] + Full parameter index - reduced parameter index tuples. + n_total_params :: int + Total number of parameters. + Returns + ------- + The metric in the full parameter dimensions. + """ + assert metric.shape == (len(mapping),) + full_metric = np.zeros(n_total_params) + for full_index, reduced_index in mapping: + full_metric[full_index] = metric[reduced_index] + return full_metric + + @staticmethod + def _update_by_reduced_parameters(update: np.ndarray, full_set: np.ndarray, + mapping: List[Tuple[int, int]]) -> np.ndarray: + """ + Update some of the parameters in the full parameter list. + + Parameters + ---------- + update :: np.ndarray + The new parameter values. + full_set :: np.ndarray + The full set of parameters. + mapping :: List[Tuple[int, int]] + The indices of the update values in the full parameter set. + """ + from copy import deepcopy + assert len(update) == len(mapping) + params = deepcopy(full_set) + for full_index, reduced_index in mapping: + params[full_index] = update[reduced_index] + return params + + @staticmethod + def _full_to_reduced_parameters(metric: np.ndarray, mapping: List[Tuple[int, int]], + n_total_params: int) -> np.ndarray: + """ + Inverse operation to _reduced_to_full_parameters(...) + """ + assert metric.shape == (n_total_params, ) + result = np.asarray([metric[full_index] for full_index, _ in mapping]) + return result + + def set_analysis_times(self, sensitivity_times: List[float]): + """ + Provide a set of time points which should be considered for the sensitivity analysis. + """ + valid_times = [] + for t in sensitivity_times: + if 1e-9 < t < self.rms_model.max_time: + valid_times.append(t) + else: + print(f"Additional time points for the sensitivity analysis must be between 1e-9 s and the maximum\n" + f"time specified in the input {self.rms_model.max_time} s. Ignoring the time point {t} s.") + self.sensitivity_times = valid_times + + def get_n_total_output(self): + """ + Get the total number of outputs from the sensitivity analysis. + """ + n_times_agg = 2 + if self.include_fluxes: + n_times_agg += 1 + if self.sensitivity_times is not None: + n_times_agg += len(self.sensitivity_times) + return self.rms_model.get_n_aggregates(with_solvent=False) * n_times_agg + + def salib_wrapped_kinetic_modeling(self, params_set: np.ndarray): + """ + SALib run function for the model evaluation. + + Parameters + ---------- + params_set :: np.ndarray + An array containing a set of parameters for the run. + """ + import os + n_aggregates = self.rms_model.get_n_aggregates(with_solvent=False) + n_ea = len(self.rms_model.ea) + n_param_sets = len(params_set) + n_outputs = self.get_n_total_output() + all_c = np.zeros((n_param_sets, n_outputs)) + filename = str(os.getpid()) + ".sample.rms" + mapping = self.get_reduced_parameter_mapping() + full_parameters = self.rms_model.get_all_parameters() + for i, reduced_params in enumerate(params_set): + # We use prescreening for the sensitivity analysis. Therefore, SALib only knows of the non-screened + # parameters and we need to map back to the full parameter set to run the actual calculations. + params = self._update_by_reduced_parameters(reduced_params, full_parameters, mapping) + h = params[:n_aggregates] + ea = params[n_aggregates:] + assert ea.shape[0] == n_ea + assert h.shape[0] == n_aggregates + ea = self.rms_model.ensure_non_negative_barriers(ea, h, self.rms_model.s) + simulation, _, volume, _, sol = self.rms_model.run_kinetic_modeling(filename, h=h.tolist(), ea=ea) + + if simulation is None: + print("Invalid model solution. This is only a reason to worry if you are not screening large spaces\n" + "of parameters. Ignoring this result and continuing the calculation.") + continue + if self.include_fluxes: + c_max, c_final, c_flux, _, additional_c = self.rms_model.integrate_results(simulation, volume, + self.sensitivity_times, + sol) + else: + c_max, c_final, additional_c = self.rms_model.concentrations(simulation, volume, self.sensitivity_times) + c_flux = None + if os.path.exists(filename): + os.remove(filename) + else: + raise RuntimeError("RMS input file was not created correctly or was removed unexpectedly.") + # Calling get_n_aggregates again ensures that we ignore the concentration of disconnected solvent compounds. + all_c[i, :n_aggregates] = c_max[:n_aggregates] + all_c[i, n_aggregates: 2 * n_aggregates] = c_final[:n_aggregates] + start_col = 2 * n_aggregates + if c_flux is not None: + all_c[i, start_col: start_col + n_aggregates] = c_flux[:n_aggregates] + start_col += n_aggregates + if additional_c is not None and self.sensitivity_times: + n_times = len(self.sensitivity_times) + all_c[i, start_col:] = np.reshape(additional_c[:, :n_aggregates], (1, n_times * n_aggregates)) + return all_c diff --git a/scine_puffin/utilities/masm_helper.py b/scine_puffin/utilities/masm_helper.py index 9a13f81..b0c972b 100644 --- a/scine_puffin/utilities/masm_helper.py +++ b/scine_puffin/utilities/masm_helper.py @@ -1,12 +1,14 @@ # -*- coding: utf-8 -*- """masm_helper.py: Collection of common procedures to be carried out with molassembler""" __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ +from copy import deepcopy from typing import Any, Dict, List, Optional, Set, Tuple, Union import math +import sys import scine_database as db import scine_molassembler as masm @@ -18,7 +20,7 @@ def get_molecules_result( bond_orders: utils.BondOrderCollection, connectivity_settings: Dict[str, Union[bool, int]], pbc_string: str = "", - unimportant_atoms: Union[List[int], Set[int], None] = None, + unimportant_atoms: Optional[Union[List[int], Set[int]]] = None, modifications: Optional[List[Tuple[int, int, float]]] = None, ) -> masm.interpret.MoleculesResult: """ @@ -33,11 +35,11 @@ def get_molecules_result( bond_orders :: utils.BondOrderCollection The bond order collection to be interpreted. connectivity_settings :: Dict[str, Union[bool, int]] - Settings describing whether to use the connectivity as predicted based on inter- - atomic distances by the utils.BondDetector. + Settings describing whether to use the connectivity as predicted based on inter-atomic distances + by the utils.BondDetector. pbc_string :: str The string specifying periodic boundaries, empty string represents no periodic boundaries. - unimportant_atoms :: Union[List[int], None] + unimportant_atoms :: Optional[Union[List[int], Set[int]]] The indices of atoms for which no stereopermutators shall be determined. modifications :: Optional[List[Tuple[int, int, float]]] Manual bond modifications. They are specified as a list with each element containing the @@ -127,17 +129,14 @@ def get_cbor_graph_from_molecule(molecule: masm.Molecule): The cbor graph string. """ - from copy import copy - canonical = copy(molecule) + canonical = deepcopy(molecule) canonical.canonicalize(masm.AtomEnvironmentComponents.All) serialization = masm.JsonSerialization(canonical) try: serialization.to_molecule() - except Exception as e: - exception_str = "Irreversible serialization/deserialization pair: " + str(e) - print("Non-canonical molecule serialization:") - print(masm.JsonSerialization(molecule)) - raise RuntimeError(exception_str) from e + except BaseException: + sys.stderr.write("Non-canonical molecule serialization. Saving non-canonical\n") + serialization = masm.JsonSerialization(molecule) binary = serialization.to_binary(masm.JsonSerialization.BinaryFormat.CBOR) cbor_string = masm.JsonSerialization.base_64_encode(binary) @@ -150,7 +149,7 @@ def get_cbor_graph( bond_orders: utils.BondOrderCollection, connectivity_settings: Dict[str, Union[bool, int]], pbc_string: str = "", - unimportant_atoms: Union[List[int], None] = None, + unimportant_atoms: Optional[Union[List[int], Set[int]]] = None, ) -> str: """ Generates the CBOR graph of an atom collection and bond order collection. @@ -163,11 +162,11 @@ def get_cbor_graph( bond_orders :: utils.BondOrderCollection The bond order collection to be interpreted. connectivity_settings :: Dict[str, Union[bool, int]] - Settings describing whether to use the connectivity as predicted based on inter- - atomic distances by the utils.BondDetector. + Settings describing whether to use the connectivity as predicted based on inter-atomic distances + by the utils.BondDetector. pbc_string :: str The string specifying periodic boundaries, empty string represents no periodic boundaries. - unimportant_atoms :: Union[List[int], None] + unimportant_atoms :: Optional[Union[List[int], Set[int]]] The indices of atoms for which no stereopermutators shall be determined. Returns @@ -234,8 +233,8 @@ def get_decision_lists( bond_orders: utils.BondOrderCollection, connectivity_settings: Dict[str, Union[bool, int]], pbc_string: str = "", - unimportant_atoms: Union[List[int], None] = None, -): + unimportant_atoms: Optional[Union[Set[int], List[int]]] = None, +) -> List[str]: """ Generates the dihedral decision lists for rotatable bonds in a given system. @@ -246,8 +245,8 @@ def get_decision_lists( bond_orders :: utils.BondOrderCollection The bond order collection to be interpreted. connectivity_settings :: Dict[str, Union[bool, int]] - Settings describing whether to use the connectivity as predicted based on inter- - atomic distances by the utils.BondDetector. + Settings describing whether to use the connectivity as predicted based on inter-atomic distances + by the utils.BondDetector. pbc_string :: str The string specifying periodic boundaries, empty string represents no periodic boundaries. unimportant_atoms :: Union[List[int], None] @@ -272,7 +271,7 @@ def add_masm_info( structure: db.Structure, bo_collection: utils.BondOrderCollection, connectivity_settings: Dict[str, Union[bool, int]], - unimportant_atoms: Union[List[int], None] = None, + unimportant_atoms: Union[List[int], Set[int], None] = None, ): """ Generates a structure's CBOR graph and decision lists and adds them to the @@ -285,19 +284,26 @@ def add_masm_info( bo_collection :: utils.BondOrderCollection The bond order collection to be interpreted. connectivity_settings :: Dict[str, Union[bool, int]] - Settings describing whether to use the connectivity as predicted based on inter- - atomic distances by the utils.BondDetector. + Settings describing whether to use the connectivity as predicted based on inter-atomic distances + by the utils.BondDetector. unimportant_atoms :: Union[List[int], None] The indices of atoms for which no stereopermutators shall be determined. """ pbc_string = structure.get_model().periodic_boundaries atoms = structure.get_atoms() - masm_results = get_molecules_result(atoms, bo_collection, connectivity_settings, pbc_string, unimportant_atoms) + try: + masm_results = get_molecules_result(atoms, bo_collection, connectivity_settings, pbc_string, unimportant_atoms) + except BaseException as e: + if structure.get_label() == db.Label.TS_OPTIMIZED: + print("Molassembler could not generate a graph for TS as it is designed for Minima") + return + raise e # Split the atom collection into separate collections for each molecule positions = masm_results.component_map.apply(atoms) properties: List[Dict[str, Any]] = [{"component": i} for i in range(len(masm_results.molecules))] - atom_map = [tuple(masm_results.component_map.apply(i)) for i in range(len(atoms))] + atom_map: List[masm.interpret.ComponentMap.ComponentIndexPair] = [masm_results.component_map.apply(i) + for i in range(len(atoms))] for i, m in enumerate(masm_results.molecules): ordering = m.canonicalize(masm.AtomEnvironmentComponents.All) diff --git a/scine_puffin/utilities/program_helper.py b/scine_puffin/utilities/program_helper.py index 437dfb7..9483122 100644 --- a/scine_puffin/utilities/program_helper.py +++ b/scine_puffin/utilities/program_helper.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """program_helper.py: Collection of common procedures to be carried out depending on underlying calculators""" __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -195,7 +195,7 @@ def cutoffs_from_properties(self, structure: db.Structure) -> Tuple[Union[float, structure.set_property(self.cutoff_name, property_id1) structure.set_property(self.rel_cutoff_name, property_id2) - if property_id1 is not None: + if property_id1 is not None and property_id2 is not None: prop = db.NumberProperty(property_id1) prop.link(self.properties) cutoff = prop.get_data() diff --git a/scine_puffin/utilities/properties.py b/scine_puffin/utilities/properties.py index 3744039..844194e 100644 --- a/scine_puffin/utilities/properties.py +++ b/scine_puffin/utilities/properties.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scine_puffin/utilities/reaction_transfer_helper.py b/scine_puffin/utilities/reaction_transfer_helper.py new file mode 100644 index 0000000..7660c44 --- /dev/null +++ b/scine_puffin/utilities/reaction_transfer_helper.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from typing import List, Set, Union, Optional +import sys + +import numpy as np +import scine_database as db + +from .transfer_helper import TransferHelper +from .surface_helper import update_slab_dict +from scine_puffin.jobs.templates.scine_react_job import ReactJob + + +class ReactionTransferHelper(TransferHelper): + """ + A class that can transfer some properties from reactants to products + after we found a reaction. + """ + + surface_indices_name = "surface_atom_indices" + slab_dict_name = "slab_dict" + + def __init__(self, react_job: ReactJob, properties: db.Collection, + alternative_component_map: Optional[List[int]] = None) -> None: + """ + Constructor based on a puffin ReactJob + + Parameters + ---------- + react_job : ReactJob + The job currently executing that found an elementary step + properties : db.Collection + The properties collection of the database + alternative_component_map : Optional[List[int]] + An alternative component map to use instead of the product component map in the react job + """ + self.react_job = react_job + self.properties = properties + self.alternative_component_map = alternative_component_map + self._special_properties = [ + self.surface_indices_name, + self.slab_dict_name + ] + + def transfer_properties(self, old_structure: db.Structure, new_structure: db.Structure, + properties_to_transfer: List[str]) \ + -> None: + """ + Transfer the given properties from one structure to another. + + Parameters + ---------- + old_structure : db.Structure + Structure holding property + new_structure : db.Structure + Structure getting property + properties_to_transfer : List[str] + A list of names of the properties to transfer + """ + for prop in properties_to_transfer: + if old_structure.has_property(prop): + if prop in self._special_properties: + sys.stderr.write(f"{prop} can only be transferred properly by transferring them to all reaction " + f"products at once. This was not done.") + self.simple_transfer_all(old_structure, new_structure, properties_to_transfer) + + def transfer_properties_between_multiple(self, + old_structures: List[db.Structure], + new_structures: List[db.Structure], + properties_to_transfer: List[str]) -> None: + """ + Transfer properties between multiple structures to multiple other structures. + This exists because some property transfers may require the knowledge of all reactants + and all products. + + Parameters + ---------- + old_structures : List[db.Structure] + The structures holding the properties + new_structures : List[db.Structure] + The structures receiving the properties + properties_to_transfer : List[str] + The names of the properties to transfer + + Raises + ------ + RuntimeError + The react job is missing a component map to map reactants to products. + NotImplementedError + Some unknown properties are requested for transferring + """ + if self.react_job.products_component_map is None: + raise RuntimeError("Could not transfer the properties to the products without the job holding a " + "component map") + for prop in properties_to_transfer: + if prop not in self._special_properties: + for old_structure in old_structures: + for new_structure in new_structures: + self.simple_transfer(old_structure, new_structure, prop) + elif prop == self.surface_indices_name: + self._surface_indices_impl(old_structures, new_structures) + elif prop == self.slab_dict_name: + self._slab_dict_impl(old_structures, new_structures) + else: + raise NotImplementedError(f"Have not implemented a method to transfer " + f"{prop} with {self.__class__.__name__}") + + def _surface_indices_impl(self, old_structures: List[db.Structure], new_structures: List[db.Structure]) -> None: + """ + The method implementing the transfer of surface indices to track which nuclei + belong to the solid state surface structure. + + Parameters + ---------- + old_structures : List[db.Structure] + The structures holding the properties + new_structures : List[db.Structure] + The structures receiving the properties + """ + new_surface_indices = self._determine_new_indices(old_structures, new_structures) + calculation = self.react_job.get_calculation() + thresh = self.react_job.settings[self.react_job.job_key]["n_surface_atom_threshold"] + for new_indices, new_structure in zip(new_surface_indices, new_structures): + # do not transfer single surface atom, since we assume that this means we don't have a surface anymore + if len(new_indices) > thresh: + self._sanity_checks(new_structure, self.surface_indices_name) + new_property = db.VectorProperty.make(self.surface_indices_name, calculation.get_model(), + np.array([float(i) for i in new_indices]), new_structure.id(), + calculation.id(), self.properties) + new_structure.set_property(self.surface_indices_name, new_property.id()) + + def _slab_dict_impl(self, old_structures: List[db.Structure], new_structures: List[db.Structure]) -> None: + """ + The implementation to transfer the slab dictionary information. + + Parameters + ---------- + old_structures : List[db.Structure] + The structures holding the properties + new_structures : List[db.Structure] + The structures receiving the properties + """ + for old_structure in old_structures: + if old_structure.has_property(self.slab_dict_name): + old_structure_with_prop = old_structure + break + else: + # no slab dict in all old_structures + return + new_surface_indices = self._determine_new_indices(old_structures, new_structures) + thresh = self.react_job.settings[self.react_job.job_key]["n_surface_atom_threshold"] + for new_indices, new_structure in zip(new_surface_indices, new_structures): + if len(new_indices) > thresh: + self._sanity_checks(new_structure, self.slab_dict_name) + self.simple_transfer(old_structure_with_prop, new_structure, self.slab_dict_name) + update_slab_dict(new_structure, self.properties, replace_property=True) + + def _determine_new_indices(self, old_structures: List[db.Structure], new_structures: List[db.Structure]) \ + -> List[List[int]]: + """ + Maps the surface indices from the old structures for the new structures as list for each + new structure specifying its surface indices. + + Parameters + ---------- + old_structures : List[db.Structure] + The structures holding the properties + new_structures : List[db.Structure] + The structures receiving the properties + + Returns + ------- + List[List[int]] + A list for each new structure giving its surface indices starting with 0 + + Raises + ------ + RuntimeError + The old and new structure do not fit together + """ + n_new_structures = len(new_structures) + new_surface_indices: List[List[int]] = [[]] * n_new_structures # new surface indices for each new structure + old_indices = self.react_job.surface_indices_all_structures() + if not old_indices: + return new_surface_indices + # sanity checks + n_atoms_old = sum(len(s.get_atoms()) for s in old_structures) + n_atoms_new = sum(len(s.get_atoms()) for s in new_structures) + if n_atoms_old != n_atoms_new: + raise RuntimeError(f"{self.__class__.__name__} could not transfer {self.surface_indices_name}, because" + f"not all old structures and new structures were given") + if max(old_indices) >= n_atoms_old: + raise RuntimeError(f"{self.__class__.__name__} could not transfer {self.surface_indices_name}, because" + f"the {self.surface_indices_name} do not fit to the size of the given structures") + component_map = self.alternative_component_map if self.alternative_component_map is not None \ + else self.react_job.products_component_map + assert component_map is not None + return self.map_total_indices_to_split_structure_indices(old_indices, component_map) + + @staticmethod + def map_total_indices_to_split_structure_indices(total_indices: Union[Set[int], List[int]], + component_map: List[int]) -> List[List[int]]: + """ + Maps the total indices to the split structure indices. + This relies on the fact that component_map.apply keeps the order of indices within the new structures + So we can infer the index in the new structure based on filling up + + Parameters + ---------- + total_indices : List[int] + The total indices + component_map : List[int] + The component map specifying which total index belongs to which split structure + + Returns + ------- + List[List[int]] + The split structure indices + """ + n_new_structures = max(component_map) + 1 + # prepare output object + new_surface_indices: List[List[int]] = [] + for _ in range(n_new_structures): # new surface indices for each new structure + new_surface_indices.append([]) + if not total_indices: + return new_surface_indices + # fill up + current_indices = [0] * n_new_structures # containing info of current index in each new structure + for i, new_structure_index in enumerate(component_map): + if i in total_indices: + new_surface_indices[new_structure_index].append(current_indices[new_structure_index]) + current_indices[new_structure_index] += 1 + return new_surface_indices + + @staticmethod + def _sanity_checks(new_structure: db.Structure, prop_name: str) -> None: + if "surface" not in str(new_structure.get_label()).lower(): + sys.stderr.write(f"Something went wrong with the index transfer or the labeling of " + f"{str(new_structure.id())}, this structure gets {prop_name}, but does not " + f"have a surface label") + if new_structure.has_property(prop_name): + sys.stderr.write(f"New structure {str(new_structure.id())} already had {prop_name}." + f" The property was somehow transferred more than once") diff --git a/scine_puffin/utilities/rms_input_file_creator.py b/scine_puffin/utilities/rms_input_file_creator.py new file mode 100644 index 0000000..3ae666b --- /dev/null +++ b/scine_puffin/utilities/rms_input_file_creator.py @@ -0,0 +1,440 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from typing import List, Dict, Optional, Tuple, Union, Any +import yaml +import numpy as np + +import scine_utilities as utils + + +def create_single_species_entry(name: str, h: float, s: float): + """ + Get an entry for a species in the RMS format. + + We could extend this function to use the heat capacity of the species or directly insert the NASA polynomials. + This would be interesting for non-constant temperature simulations. + + Parameters + ---------- + name :: str + The species name (unique identifier). + h :: float + The species enthalpy in J/mol. + s :: float + The species entropy in J/mol/K. + + Return + ------ + The species entry. + """ + species_type_str = "Species" + # NASA Polynomials: https://reactionmechanismgenerator.github.io/RMG-Py/reference/thermo/nasa.html + # C_p = a_0 + a_1 T + a_2 T^2 + a_3 T^3 + a_4 T^4 + # H/(RT) = a_0 + 1/2 a_1 T + 1/3 a_2 T^2 + 1/4 a_3 T^3 + 1/5 a_4 T^4 + a_5/T + # S/R = a_0 ln(T) + a_1 T + 1/2 a_2 T^2 + 1/3 a_3 T^3 + 1/4 a_4 T^4 + a_6 + # At the moment we only parse S and H for a given temperature. Therefore, we only set the + # coefficients a_5 = H/R and a_6 = S/R. This will only work for T=const in the reactor because in that case + # C_p does not matter and H and S are valid. + entry = { + "name": name, + "radicalelectrons": 0, + "thermo": { + "polys": [{ + "Tmax": 5000.0, + "Tmin": 1.0, + "coefs": [0.0, 0.0, 0.0, 0.0, 0.0, h / utils.MOLAR_GAS_CONSTANT, s / utils.MOLAR_GAS_CONSTANT], + "type": "NASApolynomial" + }], + "type": "NASA" + }, + "type": species_type_str + } + return entry + + +def create_rms_phase_entry(aggregate_str_ids: List[str], enthalpies: List[float], entropies: List[float], + solvent_name: Optional[str] = None) -> List[Dict]: + """ + Create all entries for the species in the RMS input dictionary (this is the 'Phase' dictionary in RMS). + + Parameters + ---------- + aggregate_str_ids :: List[str] + The aggregate IDs as strings. + enthalpies :: List[float] + The aggregate enthalpies (same ordering as for the aggregate_str_ids) in J/mol. + entropies :: List[float] + The aggregate entropies (note the ordering) in J/mol/K. + solvent_name :: Optional[str] (default None) + The name of an additional solvent species that is added to the species if provided. + + Return + ------ + The species a list of one dictionary. + """ + species_list = [] + for str_id, h, s in zip(aggregate_str_ids, enthalpies, entropies): + species_list.append(create_single_species_entry(str_id, h, s)) + if solvent_name is not None: + species_list.append(create_single_species_entry(solvent_name, 0.0, 0.0)) + return [{"Species": species_list, "name": "phase"}] + + +def create_arrhenius_reaction_entry(reactant_names: List[str], product_names: List[str], e_a: float, n: float, a: float, + type_str: str = "ElementaryReaction") -> Dict: + """ + Create a reaction entry in the RMS format assuming that the rate constant is given by the Arrhenius equation: + k = a / T^n exp(-e_a/(k_B T)). + + Parameters + ---------- + reactant_names :: List[str] + Species names of the reactions LHS (the names must correspond to an entry in the RMS phase dictionary). + product_names :: List[str] + Species names of the reactions RHS (the names must correspond to an entry in the RMS phase dictionary). + e_a :: float + Activation energy in J/mol. + n :: float + Temperature exponent. + a :: float + Arrhenius prefactor. + type_str :: str (default 'ElementaryReaction') + Type of the reaction entry (see the RMS documentation for other options). + Return + ------ + The reaction entry. + """ + kinetics_type_str: str = "Arrhenius" + return { + "kinetics": { + "A": a, + "Ea": float(e_a), + "n": n, + "type": kinetics_type_str + }, + "products": [s for s in product_names], # copying the list like this avoids id counters in the final yaml file + "reactants": [s for s in reactant_names], + "type": type_str + } + + +def create_rms_reaction_entry(prefactors: List[float], temperature_exponents: List[float], + activation_energies: Union[List[float], np.ndarray], + reactant_list: List[Tuple[List[str], List[str]]]) -> List[Dict]: + """ + Create the reaction entries for the RMS input dictionary assuming Arrhenius kinetics and that all reactions are + Elementary Reactions (according to the RMS definition): + k = a / T^n exp(-e_a/(k_B T)) + + The parameters are given as lists. The ordering in all lists must be the same. + + Parameters + ---------- + prefactors :: List[float] + Arrhenius prefactors (a in the equation above). + temperature_exponents :: List[float] + Temperature exonents (n in the equation above). + activation_energies :: Union[List[float], np.ndarray] + Activation energies (e_a in the equation above). + reactant_list :: List[Tuple[List[str], List[str]]] + LHS (tuple[0]) and RHS (tuple[1]) of all reactions. + Return + ------ + All reaction entries as a list of dictionaries. + """ + reaction_type_str = "ElementaryReaction" + reaction_list = [] + for a, n, e_a, reactants in zip(prefactors, temperature_exponents, activation_energies, reactant_list): + lhs_str_ids = reactants[0] + rhs_str_ids = reactants[1] + reaction_list.append(create_arrhenius_reaction_entry(lhs_str_ids, rhs_str_ids, e_a, n, a, reaction_type_str)) + return reaction_list + + +def create_rms_units_entry(units: Optional[Dict] = None) -> Dict: + """ + Create the 'units' entry in the RMS input dictionary. See the RMS documentation for supported units. + + Parameters + ---------- + units :: Optional[Dict] (default None) + The units as a dictionary. + + Return + ------ + If no units were provided, an empty dictionary is returned. + """ + if units is None: + units = {} + return units + + +def create_solvent_entry(solvent: str, solvent_viscosity: Optional[float], solvent_id_str: Optional[str]) -> Dict: + """ + Create the entry for the solvent. + + Parameters + ---------- + solvent :: str + The solvent name (e.g., water) to extract tabulated viscosity values. + solvent_viscosity :: Optional[float] + The solvent's viscosity in Pa s. + solvent_id_str :: Optional[str] + The string id of the solvent compound. Only required if the solvent is a reacting species. + + Return + ------ + The solvent entry as a dictionary. + """ + if solvent_viscosity is None: + solvent_viscosity = get_default_viscosity(solvent) + solvent_entry = { + "mu": {"mu": solvent_viscosity, "type": "ConstantViscosity"}, + "name": solvent if solvent_id_str is None else solvent_id_str, + "type": "Solvent" + } + return solvent_entry + + +def create_rms_yml_file(aggregate_str_ids: List[str], + enthalpies: List[float], + entropies: List[float], + prefactors: List[float], + temperature_exponents: List[float], + activation_energies: Union[List[float], np.ndarray], + reactants: List[Tuple[List[str], List[str]]], + file_name: str, solvent_name: Optional[str] = None, solvent_viscosity: Optional[float] = None, + solvent_aggregate_index: Optional[int] = None): + """ + Write the yml file input for RMS. + + Parameters + ---------- + aggregate_str_ids :: List[str] + The list of aggregate string ids to be added as RMS species. + enthalpies :: List[float] + The list of enthalpies for the aggregates (in J/mol). + entropies :: List[float] + The list of the aggregates entropies (in J/mol/K). + prefactors :: List[float] + The list of the Arrhenius prefactors. + temperature_exponents :: List[float] + The list of the temperature exponents. + activation_energies :: Union[List[float], np.ndarray] + The activation energies in J/mol. + reactants :: List[Tuple[List[str], List[str]]] + LHS (tuple[0]) and RHS (tuple[1]) of all reactions. + file_name :: str + The filename for the yml file. + solvent_name :: Optional[str] (default None) + The solvent name. + solvent_viscosity :: Optional[float] (default None) + The solvent's viscosity in Pa s. + solvent_aggregate_index :: Optional[int] (default None) + The index of the solvent in the aggregte id list. This is only required if the solvent is a reacting species. + """ + solvent_aggregate_id_str = None + solvent_in_aggregate_list = False + if solvent_aggregate_index is not None: + solvent_aggregate_id_str = aggregate_str_ids[solvent_aggregate_index] + solvent_in_aggregate_list = True + phase_entry = create_rms_phase_entry(aggregate_str_ids, enthalpies, entropies, + None if solvent_in_aggregate_list else solvent_name) + reaction_entry = create_rms_reaction_entry(prefactors, temperature_exponents, activation_energies, reactants) + unit_entry = create_rms_units_entry() + input_dictionary = { + "Phases": phase_entry, + "Reactions": reaction_entry, + "Units": unit_entry + } + if solvent_name is not None: + input_dictionary["Solvents"] = [create_solvent_entry(solvent_name, solvent_viscosity, solvent_aggregate_id_str)] + with open(file_name, "w") as outfile: + yaml.dump(input_dictionary, outfile, default_flow_style=False) + + +def resolve_rms_solver(solver_name: str, reactor: Any): + """ + Resolve the solver for the ODE system by string. + + Parameters + ---------- + solver_name :: str + The solver name. + reactor :: rms.Reactor + The julia RMS reactor object. + + Return + ------ + Returns the selected ODE solver as a julia Differential Equation object. + """ + # pylint: disable=import-error + from diffeqpy import de + # pylint: enable=import-error + solvers = { + "CVODE_BDF": de.CVODE_BDF(max_convergence_failures=60), + "Rosenbrock23": de.Rosenbrock23(), + "QNDF": de.QNDF(), + "TRBDF2": de.TRBDF2(), + "Recommended": reactor.recommendedsolver + } + if solver_name not in solvers: + raise LookupError("Unknown differential equation solver.") + return solvers[solver_name] + + +def resolve_rms_phase(phase_name: str, rms_species: Any, rms_reactions: Any, rms_solvent: Any, diffusion_limited: bool, + site_density: Optional[float]) -> Any: + """ + Resolve the RMS phase model by string. + + Parameters + ---------- + phase_name :: str + The name of the phase. Options are 'ideal_gas' and 'ideal_dilute_solution'. + rms_species :: RMS species object + The RMS species. + rms_reactions :: RMS Reaction list + The RMS reactions. + rms_solvent :: RMS solvent object + The RMS solvetn object. + diffusion_limited :: bool + If true, diffusion limitations are enforced. + site_density :: Optional[float] + The site density for surface reactions. + + Return + ------ + Returns the RMS phase object. + """ + # pylint: disable=import-error + from julia import ReactionMechanismSimulator as rms + # pylint: enable=import-error + + if phase_name == "ideal_gas": + return rms.IdealGas(rms_species, rms_reactions, name="ideal_gas") + elif phase_name == "ideal_dilute_solution": + return get_ideal_dilute_solution(rms_species, rms_reactions, rms_solvent, diffusion_limited) + elif phase_name == "ideal_surface": + return get_ideal_surface(rms_species, rms_reactions, diffusion_limited, site_density) + else: + raise LookupError("Unknown phase name for the kinetic modeling. Options are:" + "ideal_gas, ideal_dilute_solution, and ideal_surface.\n" + "You chose: " + phase_name) + + +def get_ideal_dilute_solution(rms_species: Any, rms_reactions: Any, rms_solvent: Any, diffusion_limited: bool) -> Any: + """ + Getter for the ideal dilute solution phase object of RMS. + + Parameters + ---------- + rms_species :: RMS species object + The RMS species. + rms_reactions :: RMS Reaction list + The RMS reactions. + rms_solvent :: RMS solvent object + The RMS solvetn object. + diffusion_limited :: bool + If true, diffusion limitations are enforced. + + Return + ------ + The rms.IdealDiluteSolution object. + """ + # pylint: disable=import-error + from julia import ReactionMechanismSimulator as rms + # pylint: enable=import-error + assert rms_solvent + return rms.IdealDiluteSolution(rms_species, rms_reactions, rms_solvent, name="ideal_solution", + diffusionlimited=diffusion_limited) + + +def get_ideal_surface(rms_species, rms_reactions, diffusion_limited: bool, site_density: Optional[float]): + """ + Getter for the ideal surface RMS phase object. + + Parameters + ---------- + rms_species :: RMS species object + The RMS species. + rms_reactions :: RMS Reaction list object + The RMS reactions. + diffusion_limited :: bool + If true, diffusion limitations are enforced. + site_density :: float + The site density for surface reactions. + + Return + ------ + The rms.IdealSurface object. + """ + # pylint: disable=import-error + from julia import ReactionMechanismSimulator as rms + # pylint: enable=import-error + if site_density is None: + raise NotImplementedError("No site density was provided through the settings. Currently there is no sensible" + " default available.\nPlease add an entry 'site_density: some_number' to your" + " settings.") + return rms.IdealSurface(rms_species, rms_reactions, site_density, name="ideal_surface", + diffusionlimited=diffusion_limited) + + +def get_default_viscosity(solvent_name: str): + """ + Getter for tabulated solvent viscosity (in Pa s). Tabulated values are at 25 celsius. + + Source: https://hbcp.chemnetbase.com/faces/contents/InteractiveTable.xhtml + Accessed on 23.01.2023, 14:00 + + Parameter + --------- + solvent_name :: str + The solvent's name. + + Return + ------ + The solvent's viscosity (in Pa s). + """ + viscosities = { + "water": 0.890, + "h2o": 0.890, + "acetone": 0.306, + "benzene": 0.604, + "dmso": 1.987, + "ethanol": 1.074, + "methanol": 0.544, + "hexane": 0.240, + "toluene": 0.560, + "chloroform": 0.537, + "nitrobenzene": 1.863, + "aceticacid": 1.056, + "acetonitrile": 0.369, + "aniline": 3.85, + "benzylalcohol": 5.47, + "bromoform": 1.857, + "butanol": 2.54, + "tertbutanol": 4.31, + "carbontetrachloride": 0.908, + "cyclohexane": 0.894, + "cyclohexanone": 2.02, + "dichlorobenzene": 1.324, # (ortho) | 1.044(meta) + "diethylether": 0.224, + "dioxane": 1.177, + "dmfa": 0.794, + "ethylacetate": 0.423, + "dichloroethane": 0.779, # (1, 2) | 0.464(1, 1) + "ethyleneglycol": 16.06, + "formicacid": 1.607, + "isopropanol": 2.04, + "thf": 0.456, + "ch2cl2": 0.413 + } + if solvent_name not in viscosities: + raise LookupError("There is no viscosity tabulated for the given solvent: " + solvent_name + + ". Please provide it manually by settings the corresponding settings value.") + return viscosities[solvent_name] * 1e-3 diff --git a/scine_puffin/utilities/rms_kinetic_model.py b/scine_puffin/utilities/rms_kinetic_model.py new file mode 100644 index 0000000..3fbd21e --- /dev/null +++ b/scine_puffin/utilities/rms_kinetic_model.py @@ -0,0 +1,542 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from datetime import datetime +from typing import List, Dict, Optional, Tuple, Union, Any +import numpy as np + +import scine_database as db + +from .rms_input_file_creator import create_rms_yml_file, resolve_rms_phase, resolve_rms_solver +from ..programs.rms import JuliaPrecompiler + + +class RMSKineticModel: + """ + This class provides an interface to the ReactionMechanismSimulator (RMS) for kinetic modeling. + """ + def __init__(self, settings: Dict, manager: db.Manager, model: db.Model, rms_path: str, rms_file_name: str): + """ + Parameters: + ----------- + settings : Dict[str, Any] + The settings of the kinetic modeling calculation. This must contain: + * The activation energies 'ea'. + * The enthalpies 'enthalpies'. + * The entropies 'entropies'. + * The arrhenius prefactors 'arrhenius_prefactors'. + * The arrhenius temperature exponents 'arrhenius_temperature_exponents'. + * The lower and upper uncertainty bounds for the activation energies and enthalpies. + * General settings for the kinetic modeling: Diffusion limited, phase type, aggregate ids, reaction ids, + kinetic modeling solver, start concentrations, maximum integration time. + manager : db.Manager + The database manager. + model : db.Model + The main electronic structure model (used for default temperature and pressure). + rms_path : str + The path to the RMS shared library. + rms_file_name : str + The base file name for the RMS input file. + """ + self.settings = settings + self.ea: List[float] = [float(s) for s in self.settings["ea"]] + self.h: List[float] = [float(s) for s in self.settings["enthalpies"]] + self.s: List[float] = [float(s) for s in self.settings["entropies"]] + self.a = [float(s) for s in self.settings["arrhenius_prefactors"]] + self.n = [float(s) for s in self.settings["arrhenius_temperature_exponents"]] + self.uq_ea_lower: List[float] = [0.0 for _ in self.ea] + self.uq_ea_upper: List[float] = [0.0 for _ in self.ea] + self.uq_h_lower: List[float] = [0.0 for _ in self.h] + self.uq_h_upper: List[float] = [0.0 for _ in self.h] + if "ea_lower_uncertainty" in self.settings: + self.uq_ea_lower = [float(u) for u in self.settings["ea_lower_uncertainty"]] + if "ea_upper_uncertainty" in self.settings: + self.uq_ea_upper = [float(u) for u in self.settings["ea_upper_uncertainty"]] + if "enthalpy_lower_uncertainty" in self.settings: + self.uq_h_lower = [float(u) for u in self.settings["enthalpy_lower_uncertainty"]] + if "enthalpy_upper_uncertainty" in self.settings: + self.uq_h_upper = [float(u) for u in self.settings["enthalpy_upper_uncertainty"]] + self.reactants: List[Tuple[List[str], List[str]]] = [] + self.viscosity: Optional[float] = None + self.solvent_index: Optional[int] = None + self.solvent_aggregate_str_id: Optional[str] = None + self.diffusion_limited: bool = self.settings["diffusion_limited"] + self.phase_type: str = self.settings["phase_type"] + self._phase_options = ["ideal_dilute_solution", "ideal_gas"] + self.a_str_ids: List[str] = self.settings["aggregate_ids"] + self.r_str_ids: List[str] = self.settings["reaction_ids"] + self.aggregate_types = [db.CompoundOrFlask(a_type) for a_type in self.settings["aggregate_types"]] + self.solver: str = self.settings["solver"] + self.start_concentrations = [float(s) for s in self.settings["start_concentrations"]] + self.temperature = float(model.temperature) if self.settings["reactor_temperature"] == "none" else float( + self.settings["reactor_temperature"]) + self.pressure = float(model.pressure) if self.settings["reactor_pressure"] == "none" else float( + self.settings["reactor_pressure"]) + self.site_density = None + if "site_density" in self.settings and self.settings["site_density"] != "none": + self.site_density = float(self.settings["site_density"]) + + self.solvent_species_added: bool = False + + self.solvent: str = self.settings["reactor_solvent"] + if self.solvent == "none": + self.solvent = model.solvent if model.solvent != "none" else None + if self.settings["solvent_aggregate_str_id"] != "none": + self.solvent_index = self.settings["aggregate_ids"].index(self.settings["solvent_aggregate_str_id"]) + self.solvent_aggregate_str_id = self.settings["solvent_aggregate_str_id"] + + reaction_collection = manager.get_collection("reactions") + for r_id_str in self.settings["reaction_ids"]: + reactants = db.Reaction(db.ID(r_id_str), reaction_collection).get_reactants(db.Side.BOTH) + self.reactants.append(([a_id.string() for a_id in reactants[0]], [a_id.string() for a_id in reactants[1]])) + self.a_str_ids = self.settings["aggregate_ids"] + self.max_time = float(self.settings["max_time"]) + self.abs_tol = float(self.settings["absolute_tolerance"]) + self.rel_tol = float(self.settings["relative_tolerance"]) + + self._aggregates_to_reactions: Optional[Dict[str, List[str]]] = None + self._rms_path = rms_path + self._rms_file_name: str = rms_file_name + + self.calculate_adjoined = self.settings["sensitivity_analysis"] == "adjoined_sensitivities" + if "adjpined_sensitivites" in self.settings: + self.calculate_adjoined = self.settings["adjoined_sensitivities"] + + enforce_mass_balance = True + if "enforce_mass_balance" in settings: + enforce_mass_balance = bool(settings["enforce_mass_balance"]) + self.__sanity_checks(manager, enforce_mass_balance) + + def create_yml_file(self, file_name: Optional[str], h: Optional[List[float]] = None, + s: Optional[List[float]] = None, a: Optional[List[float]] = None, + n: Optional[List[float]] = None, ea: Optional[Union[List[float], np.ndarray]] = None): + """ + Create a RMS input file with the given enthalpies (h), entropies (s), prefactors (a), temperature exponents (n), + and activation energies (ea). Arguments not provided upon function call are set to their default values in + saved upon class construction. + + All values musst be provided in SI units (e.g., J/mol for the enthalpies). + """ + if file_name is None: + file_name = self._rms_file_name + if h is None: + h = self.h + if s is None: + s = self.s + if a is None: + a = self.a + if n is None: + n = self.n + if ea is None: + ea = self.ea + create_rms_yml_file(self.a_str_ids, h, s, a, n, ea, self.reactants, file_name, self.solvent, self.viscosity, + self.solvent_index) + + def _get_phase(self, file_name: Optional[str] = None, h: Optional[List[float]] = None, + s: Optional[List[float]] = None, a: Optional[List[float]] = None, n: Optional[List[float]] = None, + ea: Optional[Union[List[float], np.ndarray]] = None): + """ + Getter for the RMS phase object. Values other than the original settings values may be provided for + enthalpies (h), entropies (s), etc. + + All values musst be provided in SI units (e.g., J/mol for the enthalpies). + """ + # pylint: disable=import-error + JuliaPrecompiler().set_root(self._rms_path) + JuliaPrecompiler().ensure_is_compiled() + from julia import ReactionMechanismSimulator as rms + # pylint: enable=import-error + if file_name is None: + file_name = "chem.rms" + + self.create_yml_file(file_name, h, s, a, n, ea) + phase_dict = rms.readinput(file_name) + rms_species = phase_dict["phase"]["Species"] + rms_reactions = phase_dict["phase"]["Reactions"] + rms_solvent = None + if "Solvents" in phase_dict: + rms_solvent = phase_dict["Solvents"][0] + + n_rms_species = len(rms_species) + self.solvent_species_added = True if n_rms_species > len(self.a_str_ids) else False + + return resolve_rms_phase(self.phase_type, rms_species, rms_reactions, + rms_solvent, self.diffusion_limited, + self.site_density) + + def get_initial_conditions(self) -> Dict: + """ + Getter for the initial conditions dictionary. + """ + initial_conditions = {"T": self.temperature} + for a_str_id, c in zip(self.a_str_ids, self.start_concentrations): + if c > 1e-16: + initial_conditions[a_str_id] = c + return initial_conditions + + def _get_domain(self, phase: Any, initial_conditions: Dict): + """ + Getter for the RMS domain object. + """ + # pylint: disable=import-error + JuliaPrecompiler().set_root(self._rms_path) + JuliaPrecompiler().ensure_is_compiled() + from julia import ReactionMechanismSimulator as rms + # pylint: enable=import-error + if self.phase_type == "ideal_gas": + initial_conditions["P"] = self.pressure + domain, y0, p = rms.ConstantTPDomain(phase=phase, initialconds=initial_conditions) + volume = y0[-1] # The reactor volume is calculated as V = nRT/P and provided as the last element of y0 + return domain, y0, p, volume, initial_conditions + if self.phase_type == "ideal_dilute_solution": + volume = 1e-3 # 1 L + initial_conditions["V"] = volume + if self.solvent_index is None: + assert self.solvent + initial_conditions[self.solvent] = self.settings["solvent_concentration"] + domain, y0, p = rms.ConstantTVDomain(phase=phase, initialconds=initial_conditions) + return domain, y0, p, volume, initial_conditions + raise RuntimeError("Error: Unknown phase type.") + + def __sanity_checks(self, manager: db.Manager, enforce_mass_balance: bool): + """ + Perform sanity checks for the attributes. + """ + if self.settings["max_time"] < 0.0: + raise AssertionError("The maximum time must be larger 0.0 for kinetic modeling.") + if self.phase_type not in self._phase_options: + raise LookupError("Unknown phase type. Options are: " + str(self._phase_options)) + if self.phase_type == "ideal_dilute_solution" and self.solvent is None: + raise AssertionError("An ideal solution was requested but no solvent was specified. Please add an" + " appropriate\nentry to the model definition or the job settings.") + if len(self.reactants) != len(self.ea) or len(self.reactants) != len(self.a): + raise AssertionError("The number of activation energies/prefactors differs from the number of" + " reactions.") + if len(self.a_str_ids) != len(self.h) or len(self.a_str_ids) != len(self.s): + raise AssertionError("The number of aggregate entropies/enthalpies differs from the number of" + " aggregates.") + compounds = manager.get_collection("compounds") + flasks = manager.get_collection("flasks") + structures = manager.get_collection("structures") + if enforce_mass_balance: + for r_str_id, reactants in zip(self.r_str_ids, self.reactants): + if not self._balanced_reaction(reactants, compounds, flasks, structures): + raise RuntimeError("Mass unbalance for reaction", r_str_id) + + @staticmethod + def _balanced_reaction(reactants: Tuple[List[str], List[str]], compounds: db.Collection, flasks: db.Collection, + structures: db.Collection): + """ + Check if the reaction conserves mass balance. This is used in sanity checks. + """ + lhs_w = sum(RMSKineticModel._calculate_weight(a_id, compounds, flasks, structures) for a_id in reactants[0]) + rhs_w = sum(RMSKineticModel._calculate_weight(a_id, compounds, flasks, structures) for a_id in reactants[1]) + return abs(lhs_w - rhs_w) < 1e-6 + + @staticmethod + def _calculate_weight(a_str_id: str, compounds: db.Collection, flasks: db.Collection, + structures: db.Collection) -> float: + """ + Calculate the molecular weight of the given aggregate + """ + import scine_utilities as utils + a_id = db.ID(a_str_id) + aggregate = db.Compound(a_id, compounds) + if not aggregate.exists(): + aggregate = db.Flask(a_id, flasks) + structure = db.Structure(aggregate.get_centroid(), structures) + weight = sum(utils.ElementInfo.mass(e) for e in structure.get_atoms().elements) + return weight + + def run_kinetic_modeling(self, rms_file_name: Optional[str] = None, h: Optional[List[float]] = None, + s: Optional[List[float]] = None, a: Optional[List[float]] = None, + n: Optional[List[float]] = None, ea: Optional[Union[List[float], np.ndarray]] = None): + """ + Run the kinetic modeling. If no values for the enthalpies (h), entropies (s), activation energies (ea), + Arrhenius prefactors (a), temperature exponents (n) etc. are provided, the values stored in + this class are used. + """ + # pylint: disable=import-error + JuliaPrecompiler().set_root(self._rms_path) + JuliaPrecompiler().ensure_is_compiled() + from julia import ReactionMechanismSimulator as rms + from diffeqpy import de + # pylint: enable=import-error + start = datetime.now() + phase = self._get_phase(rms_file_name, h, s, a, n, ea) + initial_conditions = self.get_initial_conditions() + + # We could add more domains here: ConstantVDomain, ConstantPDomain, ParametrizedTPDomain, + # ParametrizedVDomain, ParametrizedPDomain, ConstantTVDomain, ParametrizedTConstantVDomain, + # ConstantTAPhiDomain + # Variables created here: + # volume: reactor volume + # domain: RMS reactor domain object + # y0: initial conditions + # p: ODE parameters: (gibbs of each point, forward rate constants of each reaction) + domain, y0, p, volume, initial_conditions = self._get_domain(phase, initial_conditions) + reactor = rms.Reactor(domain, y0, (0.0, self.max_time), p=p) + solution = de.solve(reactor.ode, resolve_rms_solver(self.solver, reactor), + abstol=self.abs_tol, + reltol=self.rel_tol) + if not self.valid_model_solution(solution): + return None, None, None, None, None + # RMS just hangs here if multiprocessing is used. + simulation = rms.Simulation(solution, domain) + end = datetime.now() + print("RMS Input file + Solving", end - start, rms_file_name) + return simulation, reactor, volume, p, solution + + def calculate_adjoined_sensitivities(self, simulation, reactor, absolute_vertex_flux: np.ndarray, + n_params: int) -> np.ndarray: + """ + Run an adjoined sensitivity analysis for the microkinetic model. + """ + import math + # pylint: disable=import-error + from julia import ReactionMechanismSimulator as rms + # pylint: enable=import-error + solver = resolve_rms_solver(self.solver, reactor) + target_species = absolute_vertex_flux > float(self.settings["adjoined_sensitivity_threshold"]) + n_target = np.count_nonzero(target_species) + all_sensitivities: np.ndarray = np.zeros((n_target, n_params)) + high_flux_aggregates = [] + abs_tolerance = float(self.settings["absolute_tolerance_sensitivity"]) + rel_tolerance = float(self.settings["relative_tolerance_sensitivity"]) + counter = 0 + for i, (a_id, a_type) in enumerate(zip(self.a_str_ids, self.aggregate_types)): + if absolute_vertex_flux[i] > self.settings["adjoined_sensitivity_threshold"]\ + and a_type == db.CompoundOrFlask.COMPOUND: + # Calculate the adjoined sensitivities of the aggregate's concentration with respect to all + # ODE parameters (parameter sorting: Gibb's free energies, forward reaction rates) + # More information: + # https://docs.sciml.ai/SciMLSensitivity/stable/manual/direct_adjoint_sensitivities/#SciMLSensitivity.adjoint_sensitivities + # https://epubs.siam.org/doi/epdf/10.1137/S1064827501380630 + all_sensitivities[counter, :] = rms.getadjointsensitivities(simulation, a_id, solver, + abstol=abs_tolerance, reltol=rel_tolerance) + high_flux_aggregates.append(a_id) + counter += 1 + + abs_max_sensitivities = np.amax(np.abs(all_sensitivities), axis=0) + if math.isnan(np.sum(abs_max_sensitivities)): + raise RuntimeError("Error: NaN detected after sensitivity analysis. The ODE solver probably ran into" + " problems") + return abs_max_sensitivities + + def calculate_fluxes_and_concentrations(self, rms_file_name: Optional[str] = None, + time_points: Optional[List[float]] = None)\ + -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray], np.ndarray]: + """ + Run the microkinetic modeling simulation and analyse the resulting concentration trajectories. A set of time + points may be provided at which the concentrations for each aggregate is extracted. Default values for + enthalpies, entropies, activation energies etc. are assumed (see class members). + """ + if rms_file_name is None: + rms_file_name = self._rms_file_name + simulation, reactor, volume, p, solution = self.run_kinetic_modeling(rms_file_name) + if simulation is None: + raise RuntimeError("Numerical integration of the ODE failed. The system of ODEs may be ill conditioned.") + c_max, c_final, absolute_vertex_flux, absolute_edge_flux, additional_c = self.integrate_results( + simulation, volume, time_points, solution) + abs_max_sens = None + if self.calculate_adjoined: + abs_max_sens = self.calculate_adjoined_sensitivities(simulation, reactor, absolute_vertex_flux, len(p)) + return c_max, c_final, absolute_vertex_flux, absolute_edge_flux, abs_max_sens, additional_c + + def valid_model_solution(self, solution: Any) -> bool: + """ + Returns true if the solution (julia object) is valid for the kinetic model, i.e., the numerical integration + of the ODE system was successful. + """ + if all(solution.t < 0.99 * self.max_time): + return False + return True + + @staticmethod + def concentrations(simulation, volume, time_points: Optional[List[float]] = None)\ + -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]: + """ + Getter for the concentrations, max concentrations, and final concentrations from a finished simulation run. + Note that this functions assumes that Julia was imported previously. + """ + # pylint: disable=import-error + from julia import ReactionMechanismSimulator as rms + # pylint: enable=import-error + concentrations: np.ndarray = rms.concentrations(simulation) + if concentrations.shape[1] == 1: + raise RuntimeError("The numerical integration failed! The system of ODEs is probably ill conditioned.") + concentrations *= volume + maximum_concentrations = np.amax(concentrations, axis=1) + final_concentrations = concentrations[:, -1] + + additional_c: Optional[np.ndarray] = None + if time_points is not None and time_points: + additional_c = np.array([rms.concentrations(simulation, t) for t in time_points]) + additional_c *= volume + return maximum_concentrations, final_concentrations, additional_c + + def integrate_results(self, simulation, volume, times: Optional[List[float]] = None, + solution=None): + """ + Integrate the absolute reaction rates to get vertex and edge fluxes and calculate final and maximum + concentrations. + """ + start = datetime.now() + maximum_concentrations, final_concentrations, additional_c = self.concentrations(simulation, volume, times) + absolute_edge_flux = self._calculate_absolute_edge_flux(simulation, self.max_time, solution) + # The initial conditions parsed to RMS are understood as particle numbers N. The concentrations are + # therefore c = N/V. We want to retrieve the particle number again and must multiply with V. + # Note however, that V may not be constant during the kinetic modeling if not explicitly enforced. + # For practical reasons we use the initial reactor volume here. + absolute_vertex_flux = self._calculate_absolute_vertex_flux(absolute_edge_flux) + end = datetime.now() + print("Integration timing:", end - start) + return maximum_concentrations, final_concentrations, absolute_vertex_flux, absolute_edge_flux, additional_c + + def _calculate_absolute_vertex_flux(self, absolute_edge_flux: np.ndarray): + """ + Calculate the absolute vertex fluxes. + """ + n_aggregates = len(self.a_str_ids) + vertex_flux = np.zeros(n_aggregates) + for i, reactants in enumerate(self.reactants): + all_reactant_str_ids = [a_id for a_id in reactants[0] + reactants[1]] + edge_flux = absolute_edge_flux[i] + for a_str_id in all_reactant_str_ids: + a_index = self.a_str_ids.index(a_str_id) + vertex_flux[a_index] += edge_flux + return vertex_flux + + def _calculate_absolute_edge_flux(self, simulation, t_max: float, solution=None): + """ + Calculate the absolute edge fluxes. + """ + from scipy import integrate + # pylint: disable=import-error + from julia import ReactionMechanismSimulator as rms + # pylint: enable=import-error + + n_reactions = len(self.reactants) + edge_flux = np.zeros(n_reactions, dtype=float) + # The concentrations (usually) become smooth after the first few seconds. Therefore, we use a logarithmic + # spacing of the integration points. + if solution is None: + n_steps = int(np.log10(t_max) * 5e+2) + times: Union[np.ndarray, List[float]] = np.logspace(np.log10(1e-12), np.log10(t_max), num=n_steps, + dtype=float) + rates = np.zeros((n_reactions, n_steps)) + for i, t in enumerate(times): + rates[:, i] = rms.rates(simulation, min(t, t_max)) + else: + raw_times = solution.t + raw_rates = rms.rates(simulation) + # Sometimes the ODE solver is "stuck" for a few steps on a tiny dt. This would lead to "nan" in the + # flux integration. Therefore, we eliminate all dt =< 1e-12 before integrating. + time_list = [i for i in range(len(raw_times)) if i == 0 or abs(raw_times[i] - raw_times[i - 1]) > 1e-12] + times = [raw_times[i] for i in time_list] + rates = np.transpose(np.array([raw_rates[:, i] for i in time_list])) + for i in range(n_reactions): + abs_rates_i = np.abs(rates[i, :]) + edge_flux[i] = integrate.simps(abs_rates_i, times) + return edge_flux + + def get_aggregate_to_reaction_map(self): + """ + Getter for the aggregate string id to reaction string id map. + """ + if self._aggregates_to_reactions is None: + self._aggregates_to_reactions = {a_id: [] for a_id in self.a_str_ids} + for reactants, r_str_id in zip(self.reactants, self.r_str_ids): + for a_id in reactants[0] + reactants[1]: + self._aggregates_to_reactions[a_id].append(r_str_id) + return self._aggregates_to_reactions + + def translate_minimum_change_to_barriers(self, ea: np.ndarray, h: Union[List[float], np.ndarray], + s: Union[List[float], np.ndarray], a_str_id: str) -> np.ndarray: + """ + Ensure that changing the enthalpy of the aggregate with string id a_str_id did not lead to a negative reverse + reaction barrier. If this is the case, the reaction barrier is increased to make the reverse reaction barrier + zero. + + Parameters + ---------- + ea :: np.ndarray + The activation energies (in J/mol). + h :: Union[List[float], np.ndarray] + The enthalpies. + s :: Union[List[float], np.ndarray] + The entropies. + a_str_id :: str + The aggregate for which the enthalpy is changed. + + Return + ------ + Returns the updated activation energies. + """ + for r_str_id in self.get_aggregate_to_reaction_map()[a_str_id]: + r_index = self.r_str_ids.index(r_str_id) + reactants = self.reactants[r_index] + lhs_indices = [self.a_str_ids.index(a_id) for a_id in reactants[0]] + rhs_indices = [self.a_str_ids.index(a_id) for a_id in reactants[1]] + g_lhs = sum([h[i] - self.temperature * s[i] for i in lhs_indices]) + g_rhs = sum([h[i] - self.temperature * s[i] for i in rhs_indices]) + g_ts = g_lhs + ea[r_index] + g_ts = max(g_ts, g_lhs, g_rhs) + ea[r_index] = g_ts - g_lhs + return ea + + def ensure_non_negative_barriers(self, ea: np.ndarray, h: Union[List[float], np.ndarray], + s: Union[List[float], np.ndarray]) -> np.ndarray: + """ + Ensure that a change in the enthalpies or activation energies did not lead to negative reverse reaction barriers + for any reaction. + + Parameters + ---------- + ea :: np.ndarray + The activation energies (in J/mol). + h :: Union[List[float], np.ndarray] + The enthalpies. + s :: Union[List[float], np.ndarray] + The entropies. + + Returns the updated activation energies. + """ + for a_str_id in self.a_str_ids: + ea = self.translate_minimum_change_to_barriers(ea, h, s, a_str_id) + return ea + + def get_n_aggregates(self, with_solvent: bool = True): + """ + Get the number of aggregates in the RMS kinetic modeling. Note that this number may be larger than the + number of input aggregates if a solvent species was added to the kinetic modeling. This additional species + can be excluded by with_solvent=False + """ + n_aggregates = len(self.h) + if self.solvent_species_added and with_solvent: + n_aggregates += 1 + return n_aggregates + + def get_n_reactions(self): + """ + Getter for the number of reactions. + """ + return len(self.ea) + + def get_n_parameters(self) -> int: + """ + Getter for the total number of microkinetic model parameters. + """ + return self.get_n_aggregates(with_solvent=False) + self.get_n_reactions() + + def get_all_parameters(self): + """ + Getter for the full list of parameters. + """ + full_parameters = np.empty(self.get_n_parameters()) + full_parameters[:self.get_n_aggregates(with_solvent=False)] = self.h + full_parameters[self.get_n_aggregates(with_solvent=False):] = self.ea + return full_parameters diff --git a/scine_puffin/utilities/scine_helper.py b/scine_puffin/utilities/scine_helper.py index d285512..dcb8fee 100644 --- a/scine_puffin/utilities/scine_helper.py +++ b/scine_puffin/utilities/scine_helper.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """scine_helper.py: Collection of common procedures to be carried out in scine jobs""" __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -22,8 +22,8 @@ class SettingsManager: def __init__( self, - method_family: db.Model.method_family, - program: db.Model.program, + method_family: str, + program: str, calculator_settings: Union[dict, None] = None, task_settings: Union[dict, None] = None, ): @@ -79,7 +79,7 @@ def setting_is_available(self, setting_key: str): ) return False - def separate_settings(self, calculation_settings: dict): + def separate_settings(self, calculation_settings: utils.ValueCollection): """ Extract calculator and task settings from the given calculation settings. Uses the information of the settings which are available for a calculator. diff --git a/scine_puffin/utilities/surface_helper.py b/scine_puffin/utilities/surface_helper.py new file mode 100644 index 0000000..4f295c0 --- /dev/null +++ b/scine_puffin/utilities/surface_helper.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from ast import literal_eval +from platform import python_version + +from pymatgen.core import Lattice +from pymatgen.core.surface import Slab +import pymatgen +import scine_database as db +import scine_utilities as utils + + +def get_slab_dict(structure: db.Structure, properties: db.Collection) -> dict: + """ + Generate the dictionary defining a pymatgen Slab object from a database Structure, + which must hold the required information as a property. + + Notes + ----- + This code is taken from scine.chemoton.utilities.surfaces.pymatgen_interface + + + Parameters + ---------- + structure : db.Structure + The periodic Structure + properties : db.Collection + The properties collection to link the structure's properties + + Returns + ------- + dict + A dictionary that can be used as a constructor for the pymatgen.core.surface.Slab + + Raises + ------ + RuntimeError + The property 'slab_dict' not present + """ + if not structure.has_property("slab_dict"): + raise RuntimeError(f"Slab information is missing for structure '{str(structure)}'") + dict_info = db.StringProperty(structure.get_property('slab_dict'), properties) + dict_info_string = dict_info.get_data() + # remove some specific extra strings from representation to be able to make dict out of string + dict_info_string = dict_info_string.replace("]])", "]]") + dict_info_string = dict_info_string.replace("array(", "") + # transform into dict + return literal_eval(dict_info_string) + + +def update_slab_dict(structure: db.Structure, properties: db.Collection, replace_property: bool = False) -> None: + """ + Update the slab dict property of the given structure with its current + positions and periodic boundary conditions + + Notes + ----- + This code is taken from scine.chemoton.utilities.surfaces.pymatgen_interface + + Parameters + ---------- + structure : db.Structure + The structure holding the property + properties : db.Collection + The properties collection to link the structure's properties + replace_property : bool, optional + If the old property should be replaced with the new one + + Raises + ------ + RuntimeError + The structure is not periodic + """ + slab_dict_name = "slab_dict" + slab = Slab.from_dict(get_slab_dict(structure, properties)) + pbc_string = structure.get_model().periodic_boundaries + if not pbc_string or pbc_string.lower() == "none": + raise RuntimeError("Structure is missing periodic boundary conditions") + pbc = utils.PeriodicBoundaries(pbc_string) + atoms = structure.get_atoms() + ele = [utils.ElementInfo.symbol(e) for e in atoms.elements] + coords = pbc.transform(atoms.positions, False) + lattice = _construct_pmg_lattice(pbc) + new_slab = Slab(lattice, ele, coords, slab.miller_index, slab.oriented_unit_cell, slab.shift, + slab.scale_factor, coords_are_cartesian=False) + if not replace_property: + dict_property = db.StringProperty(structure.get_property(slab_dict_name), properties) + dict_property.set_data(str(new_slab.as_dict())) + return + structure.clear_properties(slab_dict_name) + new_property = db.StringProperty.make(slab_dict_name, structure.get_model(), + str(new_slab.as_dict()), properties) + structure.set_property(slab_dict_name, new_property.id()) + + +def _construct_pmg_lattice(pbc: utils.PeriodicBoundaries) -> Lattice: + matrix = pbc.matrix * utils.ANGSTROM_PER_BOHR + if python_version() >= "3.8": + from importlib.metadata import version + pymatgen_version = version(pymatgen.__name__) + elif hasattr(pymatgen, "__version__"): + pymatgen_version = getattr(pymatgen, "__version__") + else: + pymatgen_version = "0" + if pymatgen_version >= "2022": + return Lattice(matrix, pbc.periodicity) # type: ignore # pylint: disable=too-many-function-args + return Lattice(matrix) diff --git a/scine_puffin/utilities/transfer_helper.py b/scine_puffin/utilities/transfer_helper.py new file mode 100644 index 0000000..70b5247 --- /dev/null +++ b/scine_puffin/utilities/transfer_helper.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +See LICENSE.txt for details. +""" + +from abc import ABC, abstractmethod +from typing import List + +import scine_database as db + + +class TransferHelper(ABC): + """ + An abstract base class to transfer properties from one or more structures + to one or more other structures. + """ + + @abstractmethod + def transfer_properties(self, old_structure: db.Structure, new_structure: db.Structure, + properties_to_transfer: List[str]) \ + -> None: + """ + Transfer properties between individual structures based on given property names + + Parameters + ---------- + old_structure : db.Structure + The structure holding the properties + new_structure : db.Structure + The structure receiving the properties + properties_to_transfer : List[str] + The names of the properties to transfer + """ + raise NotImplementedError + + @abstractmethod + def transfer_properties_between_multiple(self, + old_structures: List[db.Structure], + new_structures: List[db.Structure], + properties_to_transfer: List[str]) -> None: + """ + Transfer properties between multiple structures based on given property names + + Parameters + ---------- + old_structure : db.Structure + The structure holding the properties + new_structure : db.Structure + The structure receiving the properties + properties_to_transfer: List[str] + The names of the properties to transfer + """ + raise NotImplementedError + + @staticmethod + def simple_transfer_all(old_structure: db.Structure, new_structure: db.Structure, properties: List[str]) \ + -> None: + """ + Simply set the id of the given properties from one structure to another one + + Parameters + ---------- + old_structure : db.Structure + The structure holding the properties + new_structure : db.Structure + The structure receiving the properties + properties_to_transfer : List[str] + The names of the properties to transfer + """ + for prop in properties: + TransferHelper.simple_transfer(old_structure, new_structure, prop) + + @staticmethod + def simple_transfer(old_structure: db.Structure, new_structure: db.Structure, property_to_transfer: str) \ + -> None: + """ + Transfer a single property from one structure to another. + + Parameters + ---------- + old_structure : db.Structure + The structure holding the properties + new_structure : db.Structure + The structure receiving the properties + property_to_transfer : str + The name of the property to transfer + """ + if old_structure.has_property(property_to_transfer): + prop_id = old_structure.get_property(property_to_transfer) + new_structure.set_property(property_to_transfer, prop_id) diff --git a/scine_puffin/utilities/turbomole_helper.py b/scine_puffin/utilities/turbomole_helper.py index 400a202..9d02c31 100644 --- a/scine_puffin/utilities/turbomole_helper.py +++ b/scine_puffin/utilities/turbomole_helper.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """turbomole_helper.py: Collection of common procedures to be carried out with turbomole""" __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ diff --git a/scripts/rms/build_rms.sh b/scripts/rms/build_rms.sh new file mode 100755 index 0000000..d7803a0 --- /dev/null +++ b/scripts/rms/build_rms.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# This code is licensed under the 3-clause BSD license. +# Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. +# See LICENSE.txt for details. + +# This script creates a new Python environment (python-3.7) and installs julia and +# the reaction mechanism simulator. Note that the script requires an existing conda +# installation and the path to this installation in the variable conda_home (vide infra). + +# Assumes that conda exists at the following path +export conda_home=The-path-to-the-anaconda-installation +export rms_path=$PWD/ReactionMechanismSimulator +export rms_so_path=$rms_path/rms.so +export path_to_conda_env=$PWD/rms_conda_env + +# Download RMS development version +git clone https://github.com/ReactionMechanismGenerator/ReactionMechanismSimulator.jl.git $rms_path +cd $rms_path +# This script was tested with the following commit (Aug. 8. 2023, 19:32). The source code has likely changed since then and may include additional bug +# fixes and improvements. +git checkout 8fa7f60e2ef62050ba365503ddb95c16933752c3 +cd .. +# export conda commands + create environment +. $conda_home/etc/profile.d/conda.sh +conda env create --file $rms_path/environment.yml --prefix $path_to_conda_env +conda activate $path_to_conda_env + +# install julia and RMS +conda install -c rmg "julia>=1.8.5,!=1.9.0" "pyjulia>=0.6" + +export JULIA_NUM_THREADS=8 +export path_to_python=`which python` +echo 'import Pkg' > install_rms.jl +echo 'Pkg.add("PyCall")' >> install_rms.jl +echo 'ENV["CONDA_JL_HOME"] = "'$conda_home'"' >> install_rms.jl +echo 'Pkg.build("Conda")' >> install_rms.jl +echo 'ENV["PYTHON"] = "'$path_to_python'"' >> install_rms.jl +echo 'Pkg.build("PyCall")' >> install_rms.jl +echo 'Pkg.add("DiffEqBase")' >> install_rms.jl +echo 'Pkg.build("DiffEqBase")' >> install_rms.jl +echo 'Pkg.add("DifferentialEquations")' >> install_rms.jl +echo 'Pkg.build("DifferentialEquations")' >> install_rms.jl +echo 'Pkg.develop(Pkg.PackageSpec(path="'$rms_path'"))' >> install_rms.jl +echo 'Pkg.build("ReactionMechanismSimulator")' >> install_rms.jl +echo "Julia install script" +julia ./install_rms.jl + +# create system image +python -m julia.sysimage $rms_so_path +# We need diffeqpy for the python bindings of SciML. +pip install diffeqpy==1.2.0 + +# Check installation +python -c 'from julia import Julia; jl = Julia(sysimage="'$rms_so_path'"); from diffeqpy import de;' +python -c 'from julia import Julia; jl = Julia(sysimage="'$rms_so_path'"); from julia import ReactionMechanismSimulator' + diff --git a/scripts/rms/scine2rms.py b/scripts/rms/scine2rms.py new file mode 100644 index 0000000..e9844a4 --- /dev/null +++ b/scripts/rms/scine2rms.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +__copyright__ = """ This code is licensed under the 3-clause BSD license. +Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +See LICENSE.txt for details. +""" + +""" +This script creates a RMS input file from a SCINE database. +Requires scine_chemoton. +""" + +import scine_utilities as utils +import scine_database as db + +from scine_puffin.utilities.rms_input_file_creator import create_rms_yml_file +from scine_chemoton.gears.kinetic_modeling.rms_network_extractor import ReactionNetworkData +from scine_chemoton.gears.kinetic_modeling.kinetic_modeling import KineticModeling +from scine_chemoton.utilities.model_combinations import ModelCombination +from scine_chemoton.gears.kinetic_modeling.atomization import ZeroEnergyReference +from scine_chemoton.utilities.db_object_wrappers.thermodynamic_properties import ReferenceState + + +def get_options() -> KineticModeling.Options: + """ + Define the electronic structure models and options for the rate calculations here. + """ + T = 150 + 273.15 # 150 degree Celsius + p = utils.MOLAR_GAS_CONSTANT * T / 1e-3 # 1 mol/L + + model = db.Model("gfn2", "gfn2", "") + model.solvation = "gbsa" + model.solvent = "toluene" + model.program = "xtb" + model.pressure = p + model.temperature = T + + dft_struc_model = db.Model("dft", "pbe-d3bj", "def2-sv(p)") + dft_struc_model.solvation = "cosmo" + dft_struc_model.solvent = "toluene" + dft_struc_model.program = "turbomole" + dft_struc_model.pressure = model.pressure + dft_struc_model.temperature = model.temperature + + model_single_points = db.Model("dft", "pbe0-d3bj", "def2-tzvp") + model_single_points.solvation = "cosmo" + model_single_points.solvent = "toluene" + model_single_points.program = "turbomole" + model_single_points.pressure = model.pressure + model_single_points.temperature = model.temperature + + options = KineticModeling.Options + options.model_combinations = [ModelCombination(model_single_points, dft_struc_model), + ModelCombination(model_single_points, model)] + options.model_combinations_reactions = [ModelCombination(model_single_points, dft_struc_model), + ModelCombination(model_single_points, model)] + options.reference_state = ReferenceState(T, p) + options.max_barrier = 300.0 # kJ/mol + options.only_electronic = False + options.min_flux_truncation = 1e-9 + return options + + +if __name__ == "__main__": + manager = db.Manager() + db_name = "my-database" + db_ip = "my-database-ip" + db_port = 27017 + credentials = db.Credentials(db_ip, db_port, db_name) + manager.set_credentials(credentials) + manager.connect(False, 60, 120) + reactions = manager.get_collection("reactions") + + kinetic_modeling_options = get_options() + refs = [ZeroEnergyReference(c.electronic_model) for c in kinetic_modeling_options.model_combinations] + network_data = ReactionNetworkData(manager, kinetic_modeling_options, refs) + + rms_file_name = "chem.rms" + solvent = "toluene" + solvent_viscosity = None + solvent_aggregate_index = None + create_rms_yml_file(network_data.aggregate_ids, network_data.enthalpies, network_data.entropies, + network_data.prefactors, network_data.exponents, network_data.ea, + [db.ID(str_id) for str_id in network_data.reaction_ids], reactions, rms_file_name, solvent, + solvent_viscosity, solvent_aggregate_index) diff --git a/setup.cfg b/setup.cfg index 8b95362..46278c9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,7 +23,7 @@ max-line-length=120 [pylint.TYPECHECK] # types cannot be inferred leading to false negatives -ignored-modules=scine_utilities,scine_database,scine_molassembler,scine_readuct,scine_sparrow,scine_swoose,scine_kinetx +ignored-modules=scine_utilities,scine_database,scine_molassembler,scine_readuct,scine_sparrow,scine_swoose,scine_kinetx,scine_parrot # regex of default ignored + arg args kwargs ignored-argument-names=_.*|^ignored_|^unused_|arg|args|kwargs @@ -46,4 +46,4 @@ generated-members=Lock ignore=_version.py # accept BaseException -overgeneral-exceptions=Exception +overgeneral-exceptions=builtins.Exception diff --git a/setup.py b/setup.py index fbe6445..bdfe792 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. -Copyright ETH Zurich, Laboratory of Physical Chemistry, Reiher Group. +Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ @@ -41,7 +41,7 @@ setup( name="scine_puffin", version=__version__, - author="ETH Zurich, Laboratory of Physical Chemistry, Reiher Group", + author="ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group", author_email="scine@phys.chem.ethz.ch", description="Calculation handler for SCINE Chemoton", long_description=readme,