From e8601c4c501db7e9f43c5ae30f08446b3a52c7c9 Mon Sep 17 00:00:00 2001 From: Michele Ceriotti Date: Fri, 8 Mar 2024 15:47:20 +0100 Subject: [PATCH] Adding utilities to parse i-PI output files into more standard python / ase objects (#313) * Added utilities to parse ipi output files, and also mention this in the docs * Disable installation of tests, as it's broken! --- docs/src/getting-started.rst | 3 +- docs/src/output-files.rst | 33 +++ .../piglet_liquid_water/input.xml | 1 + ipi/__init__.py | 14 +- ipi/utils/__init__.py | 1 + ipi/utils/io/io_units.py | 5 +- ipi/utils/messages.py | 2 +- ipi/utils/parsing.py | 222 ++++++++++++++++++ setup.cfg | 2 +- setup.py | 8 +- 10 files changed, 277 insertions(+), 14 deletions(-) create mode 100644 ipi/utils/parsing.py diff --git a/docs/src/getting-started.rst b/docs/src/getting-started.rst index 8d352a782..113c7516a 100644 --- a/docs/src/getting-started.rst +++ b/docs/src/getting-started.rst @@ -16,9 +16,8 @@ as the system has installed: - Python version 3.6 or greater (starting from version 2.0, i-PI is not Python2 compatible) - - The Python numerical library NumPy -src/getting-started.rst- + See :ref:`runningsimulations` for more details on how to launch i-PI. Additionally, most client codes will have their own requirements. Many diff --git a/docs/src/output-files.rst b/docs/src/output-files.rst index eda6e1f42..e075f21ec 100644 --- a/docs/src/output-files.rst +++ b/docs/src/output-files.rst @@ -218,3 +218,36 @@ run > python i-pi RESTART + +Reading output files +-------------------- + +It can be useful to parse the output files of i-PI into a format that can +be readily manipulated in a custom Python script. To this end, i-PI provides +a few utilities. `ipi.read_output` that can be used to parse a property output +file, that returns data blocks as a dictionary of numpy array, and additional +information from the header (such as units, and the description of each +property) as a separate dictionary + +.. code-block:: + + from ipi import read_output + data, info = read_output("simulation.out") + +Trajectory files can be read with `ipi.read_trajectory`. This reads the +trajectory output into a list of `ase.Atoms` objects (hence this functionality +has a dependency on `ase`), converting positions and cell to angstrom, and +moving other properties to arrays and converting +them to ASE units (e.g. forces are in eV/Å). `extras` output files (that +contain either numerical data, or raw strings, returned by the driver code +in addition to energy and forces) can be processed by using `format='extras'` +and an option: the call will then return a dictionary with an entry having the +name of the type of extra (if present) and either a list of the raw strings, +or a numpy array with the data. A second dictionary entry contains the list +of step numbers. + +.. code-block:: + + from ipi import read_trajectory + data = read_trajectory("simulation.dipoles", format="extras") + diff --git a/examples/features/path_integral_molecular_dynamics/piglet_liquid_water/input.xml b/examples/features/path_integral_molecular_dynamics/piglet_liquid_water/input.xml index 8065346ea..1db387739 100644 --- a/examples/features/path_integral_molecular_dynamics/piglet_liquid_water/input.xml +++ b/examples/features/path_integral_molecular_dynamics/piglet_liquid_water/input.xml @@ -2,6 +2,7 @@ [ step, time{picosecond}, conserved, temperature{kelvin}, kinetic_cv, potential, pressure_cv{megapascal}] positions{angstrom} + forces 100 diff --git a/ipi/__init__.py b/ipi/__init__.py index c37bad218..54fecc771 100644 --- a/ipi/__init__.py +++ b/ipi/__init__.py @@ -2,6 +2,18 @@ The i-PI package. """ -__all__ = ["clients", "engine", "inputs", "interfaces", "utils", "ipi_global_settings"] +# expose some utility functions in a more direct way +from ipi.utils.parsing import read_output, read_trajectory + +__all__ = [ + "clients", + "engine", + "inputs", + "interfaces", + "utils", + "ipi_global_settings", + "read_output", + "read_trajectory", +] ipi_global_settings = {"floatformat": "%16.8e"} diff --git a/ipi/utils/__init__.py b/ipi/utils/__init__.py index acbbb8f32..e084081d6 100644 --- a/ipi/utils/__init__.py +++ b/ipi/utils/__init__.py @@ -11,6 +11,7 @@ "mathtools", "prng", "inputvalue", + "parsing", "nmtransform", "messages", "softexit", diff --git a/ipi/utils/io/io_units.py b/ipi/utils/io/io_units.py index fcd649d52..b01685575 100755 --- a/ipi/utils/io/io_units.py +++ b/ipi/utils/io/io_units.py @@ -125,7 +125,4 @@ def process_units( atoms.names[:] = names atoms.m[:] = masses - return { - "atoms": atoms, - "cell": cell, - } + return {"atoms": atoms, "cell": cell, "comment": comment} diff --git a/ipi/utils/messages.py b/ipi/utils/messages.py index 5d3cadf2f..35d122d70 100644 --- a/ipi/utils/messages.py +++ b/ipi/utils/messages.py @@ -107,7 +107,7 @@ def banner(): | ################################# | \__#_/ \____/ \____/ \_#__/ # _ _______ _____ # - # (_) |_ __ \|_ _| # -*- v 2.6.0 -*- + # (_) |_ __ \|_ _| # -*- v 2.6.3 -*- # __ ______ | |__) | | | # Y [ ||______|| ___/ | | # A Universal Force Engine 0 0 | | _| |_ _| |_ # diff --git a/ipi/utils/parsing.py b/ipi/utils/parsing.py new file mode 100644 index 000000000..a1612ea5b --- /dev/null +++ b/ipi/utils/parsing.py @@ -0,0 +1,222 @@ +""" Utility functions to parse i-PI output files. + +These are meant to be used in Python post-processing pipelines, so +trajectory files are read as ASE objects (assuming units to be +Angstrom and eV), and output files are read in a dictionary of +numpy arrays. +""" + +import re +import numpy as np +from ipi.utils.units import unit_to_user + +try: + import ase +except ImportError: + ase = None + +__all__ = ["read_output", "read_trajectory"] + + +def read_output(filename): + """Reads an i-PI output file and returns a dictionary with the properties in a tidy order, + and information on units and descriptions of the content. + + Usage: + read_output("filename") + + Returns: + values, info + + values: a dictionary with the property names as keys, and the values as numpy arrays + info: a dictionary with the property names as keys and as values tuples of (units, description) + """ + + # Regex pattern to match header lines and capture relevant parts + header_pattern = re.compile( + r"#\s*(column|cols\.)\s+(\d+)(?:-(\d+))?\s*-->\s*([^\s\{]+)(?:\{([^\}]+)\})?\s*:\s*(.*)" + ) + + # Reading the file + with open(filename, "r") as file: + lines = file.readlines() + + header_lines = [line for line in lines if line.startswith("#")] + data_lines = [line for line in lines if not line.startswith("#") and line.strip()] + + # Interprets properties + properties = {} + for line in header_lines: + match = header_pattern.match(line) + if match: + # Extracting matched groups + col_type, start_col, end_col, property_name, units, description = ( + match.groups() + ) + col_info = f"{start_col}-{end_col}" if end_col else start_col + properties[col_info] = { + "name": property_name, + "units": units, + "description": description, + } + + # Parse data + values_dict = {} + info_dict = {} + for col_info, prop_info in properties.items(): + # Initialize list to hold values for each property + values_dict[prop_info["name"]] = [] + # Save units and description + info_dict[prop_info["name"]] = (prop_info["units"], prop_info["description"]) + + for line in data_lines: + values = line.split() + for column_info, prop_info in properties.items(): + if "-" in column_info: # Multi-column property + start_col, end_col = map( + int, column_info.split("-") + ) # 1-based indexing + prop_values = values[ + start_col - 1 : end_col + ] # Adjust to 0-based indexing + else: # Single column property + col_index = int(column_info) - 1 # Adjust to 0-based indexing + prop_values = [values[col_index]] + + values_dict[prop_info["name"]].append([float(val) for val in prop_values]) + + for prop_name, prop_values in values_dict.items(): + values_dict[prop_name] = np.array( + prop_values + ).squeeze() # make 1-col into a flat array + + return values_dict, info_dict + + +def read_trajectory( + filename, + format=None, + dimension="automatic", + units="automatic", + cell_units="automatic", +): + """Reads a file in i-PI format and returns it in ASE format. + + `format` can be `xyz` (i-PI formatted), `pdb`, `binary`, `json`, `ase`, and if not specified it'll + be inferred from the filename extension. `extras` will read a trajectory containing the additional + data returned from a calculator; will try to read as a float array, and if it fails it'll resort + to returning a list of raw strings. + units can be inferred from the file content, or specified with `dimension`, `units` and `cell_units` + """ + + if ase is None: + raise ImportError( + "read_trajectory requires the `ase` package to return the structure in ASE format" + ) + + from ipi.utils.io import read_file + + if format is None: + # tries to infer the file format + format = filename[filename.rfind(".") + 1 :] + if format not in ["xyz", "pdb", "binary", "json", "ase"]: + raise ValueError(f"Unrecognized file format: {format}") + + file_handle = open(filename, "r") + bohr2angstrom = unit_to_user("length", "angstrom", 1.0) + comment_regex = re.compile(r"(\w+)\{([^}]+)\}") + step_regex = re.compile(r"Step:\s+(\d+)") + + frames = [] + while True: + try: + if format == "extras": + file = open(filename, "r") + step_regex = re.compile(r"#EXTRAS\((\w+)\)# *Step:\s+(\d+)") + step_list = [] + data_list = [] + data_frame = [] + is_array = True + property_name = "" + for line in file: + matches = step_regex.findall(line) + if len(matches) > 0: + if len(data_frame) > 0: + try: + data_processed = np.loadtxt(data_frame) + except: + is_array = False + data_processed = "\n".join(data_frame) + data_list.append(data_processed) + data_frame = [] + + # Found a new step, update current_step and initialize the list for data + step_list.append(int(matches[0][1])) + if property_name == "": + property_name = matches[0][0] + elif property_name != matches[0][0]: + raise ValueError( + f"Inconsistent property {matches[0][0]} found in extras containing {property_name}" + ) + else: + data_frame.append(line) + + if len(data_frame) > 0: + try: + data_processed = np.loadtxt(data_frame) + except: + is_array = False + data_processed = "\n".join(data_frame) + data_list.append(data_processed) + if is_array: + data_list = np.array(data_list).squeeze() + return { + "step": np.asarray(step_list, dtype=int), + property_name: data_list, + } + else: + ret = read_file( + format, + file_handle, + dimension=dimension, + units=units, + cell_units=cell_units, + ) + + frame = ase.Atoms( + ret["atoms"].names, + positions=ret["atoms"].q.reshape((-1, 3)) * bohr2angstrom, + cell=ret["cell"].h.T * bohr2angstrom, + pbc=True, + ) + + # parse comment to get the property + matches = comment_regex.findall(ret["comment"]) + + # get what we have found + if len(matches) >= 2: + what = matches[-2][0] + else: # defaults to reading positions + what = "positions" + + # ... and the step + matches = step_regex.findall(ret["comment"]) + if len(matches) >= 1: + frame.info["step"] = int(matches[-1][0]) + + # if we have forces, set positions to zero (that data is missing!) and set forces instead + if what == "forces": + # set forces and convert to eV/angstrom + frame.positions *= 0 + frame.arrays["forces"] = ret["atoms"].q.reshape( + (-1, 3) + ) * unit_to_user("force", "ev/ang", 1.0) + + frames.append(frame) + + except EOFError: + break + except: + raise + + return frames diff --git a/setup.cfg b/setup.cfg index a2c9dcb6a..ff9f9d8b3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = ipi -version = 2.6.2 +version = 2.6.3 description = A Python interface for ab initio path integral molecular dynamics simulations long_description = file: README.md long_description_content_type = text/x-rst diff --git a/setup.py b/setup.py index 69e020cf3..bf8364962 100644 --- a/setup.py +++ b/setup.py @@ -2,19 +2,17 @@ from setuptools import setup, find_packages +# exclude i-pi-driver that is a compiled fortran file when installing from a local dir +scripts = [str(p) for p in Path("bin").iterdir() if p.name != "i-pi-driver"] setup( packages=[ *find_packages(exclude=["ipi_tests*"]), "ipi._driver", "ipi._driver.pes", - "ipi.tests", - "ipi.tests.regression_tests", ], package_dir={ "ipi._driver": "drivers/py", - "ipi.tests": "ipi_tests", }, - package_data={"ipi.tests.regression_tests": ["tests/NVE/NVE_1/harmonic_python/*"]}, - scripts=[str(p) for p in Path("bin").iterdir()], + scripts=scripts, )