Skip to content

Commit

Permalink
Merge pull request #36849 from ajjackson/euphonic-json
Browse files Browse the repository at this point in the history
Abins: JSON data import
  • Loading branch information
robertapplin authored Mar 8, 2024
2 parents e7bf07c + d58d3f3 commit 8bef42b
Show file tree
Hide file tree
Showing 28 changed files with 266 additions and 44 deletions.
1 change: 1 addition & 0 deletions Testing/Data/UnitTest/NH3_abinsdata_LoadJSON.json.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3cd81ba34aa7513d3230a7089e2772ba
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f32b26a2342571f1f6bc348a4d3dd2cc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f32b26a2342571f1f6bc348a4d3dd2cc
1 change: 1 addition & 0 deletions Testing/Data/UnitTest/NH3_abinsdata_LoadJSON_data.txt.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1339c62d0bd3081ca95d0d1512236ba8
1 change: 1 addition & 0 deletions Testing/Data/UnitTest/NH3_euphonic_fc_LoadJSON.json.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e188ddc55518578b269fdfb4a03c6687
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
265f00377a073b32897f86d2095a76aa
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
881d2ba21adb714d1d85aca8d24ae44c
1 change: 1 addition & 0 deletions Testing/Data/UnitTest/NH3_euphonic_modes_LoadJSON.json.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
811084a43f90e92e2a23999fdf072425
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f32b26a2342571f1f6bc348a4d3dd2cc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f32b26a2342571f1f6bc348a4d3dd2cc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
42f00fe94801f14612a02a2da5167527
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
- Abins and Abins2D now support JSON file import. Supported formats are:

- AbinsData (dump of internal object, intended for development and testing)
- euphonic.QpointPhononModes (an equivalent set of data dumped from
the Euphonic library)
- euphonic.ForceConstants (force constants which may be manipulated
in Euphonic, and will be converted to phonon modes on a q-point
mesh when Abins(2D) is run)

The Euphonic JSON formats are convenient to create with Python
scripts, and recommended for users who wish to somehow customise or
manipulate their data before using it with Abins(2D).
20 changes: 19 additions & 1 deletion scripts/abins/abinsalgorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import abins
from abins.constants import AB_INITIO_FILE_EXTENSIONS, ALL_INSTRUMENTS, ATOM_PREFIX
from abins.input.jsonloader import abins_supported_json_formats, JSONLoader
from abins.instruments import get_instrument, Instrument


Expand Down Expand Up @@ -98,7 +99,7 @@ def declare_common_properties(self) -> None:
name="AbInitioProgram",
direction=Direction.Input,
defaultValue="CASTEP",
validator=StringListValidator(["CASTEP", "CRYSTAL", "DMOL3", "FORCECONSTANTS", "GAUSSIAN", "VASP"]),
validator=StringListValidator(["CASTEP", "CRYSTAL", "DMOL3", "FORCECONSTANTS", "GAUSSIAN", "JSON", "VASP"]),
doc="An ab initio program which was used for vibrational or phonon calculation.",
)

Expand Down Expand Up @@ -223,6 +224,7 @@ def validate_common_inputs(self, issues: dict = None) -> Dict[str, str]:
"DMOL3": self._validate_dmol3_input_file,
"FORCECONSTANTS": self._validate_euphonic_input_file,
"GAUSSIAN": self._validate_gaussian_input_file,
"JSON": self._validate_json_input_file,
"VASP": self._validate_vasp_input_file,
}
ab_initio_program = self.getProperty("AbInitioProgram").value
Expand Down Expand Up @@ -806,6 +808,22 @@ def _validate_euphonic_input_file(cls, filename_full_path: str) -> dict:
# Did not return already: No problems found
return dict(Invalid=False, Comment="")

@classmethod
def _validate_json_input_file(cls, filename_full_path: str) -> dict:
logger.information("Validate JSON file with vibrational or phonon data.")
output = cls._validate_ab_initio_file_extension(
ab_initio_program="JSON", filename_full_path=filename_full_path, expected_file_extension=".json"
)
if output["Invalid"]:
output["Comment"] = ".json extension is expected for a JSON file"
return output

json_format = JSONLoader.check_json_format(filename_full_path)
if json_format in abins_supported_json_formats:
return dict(Invalid=False, Comment=f"Found JSON file format: {json_format.name}")

return dict(Invalid=True, Comment=f"Found unsupported JSON file format: {json_format.name}")

@classmethod
def _validate_vasp_input_file(cls, filename_full_path: str) -> dict:
logger.information("Validate VASP file with vibrational or phonon data.")
Expand Down
2 changes: 1 addition & 1 deletion scripts/abins/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@
MAX_ORDER = 4 # max quantum order event

ALL_SUPPORTED_AB_INITIO_PROGRAMS = ["CRYSTAL", "CASTEP", "DMOL3", "GAUSSIAN", "VASP"]
AB_INITIO_FILE_EXTENSIONS = ["phonon", "out", "outmol", "log", "LOG", "xml", "yaml", "castep_bin", "hdf5"]
AB_INITIO_FILE_EXTENSIONS = ["phonon", "out", "outmol", "log", "LOG", "xml", "yaml", "castep_bin", "hdf5", "json"]

ONE_DIMENSIONAL_INSTRUMENTS = ["TOSCA", "Lagrange"]
TWO_DIMENSIONAL_CHOPPER_INSTRUMENTS = ["MAPS", "MARI", "MERLIN"]
Expand Down
2 changes: 2 additions & 0 deletions scripts/abins/input/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .dmol3loader import DMOL3Loader
from .euphonicloader import EuphonicLoader
from .gaussianloader import GAUSSIANLoader
from .jsonloader import JSONLoader
from .vasploader import VASPLoader

from .tester import Tester
Expand All @@ -17,6 +18,7 @@
"CRYSTAL": CRYSTALLoader,
"DMOL3": DMOL3Loader,
"GAUSSIAN": GAUSSIANLoader,
"JSON": JSONLoader,
"VASP": VASPLoader,
"FORCECONSTANTS": EuphonicLoader,
}
16 changes: 13 additions & 3 deletions scripts/abins/input/abinitioloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

# SPDX - License - Identifier: GPL - 3.0 +
from abc import ABCMeta, abstractmethod
from pathlib import Path
from typing import Sequence

from mantid.kernel import logger
Expand All @@ -29,11 +30,20 @@ class AbInitioLoader(metaclass=NamedAbstractClass):
read_formatted_data() if necessary and caching the results.
"""

def __init__(self, input_ab_initio_filename=None):
self._sample_form = None
self._ab_initio_program = None
def __init__(self, input_ab_initio_filename: str = None):
"""An object for loading phonon data from ab initio output files"""

if not isinstance(input_ab_initio_filename, str):
raise TypeError("Filename must be a string")
elif not Path(input_ab_initio_filename).is_file():
raise IOError(f"Ab initio file {input_ab_initio_filename} not found.")

self._clerk = abins.IO(input_filename=input_ab_initio_filename, group_name=abins.parameters.hdf_groups["ab_initio_data"])

@property
@abstractmethod
def _ab_initio_program(self) -> str: ...

@abstractmethod
def read_vibrational_or_phonon_data(self) -> abins.AbinsData:
"""
Expand Down
10 changes: 3 additions & 7 deletions scripts/abins/input/casteploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,9 @@ class CASTEPLoader(AbInitioLoader):
Functions to read phonon file taken from SimulatedDensityOfStates (credits for Elliot Oram.).
"""

def __init__(self, input_ab_initio_filename):
"""
:param input_ab_initio_filename: name of file with phonon data (foo.phonon)
"""
super().__init__(input_ab_initio_filename=input_ab_initio_filename)
self._ab_initio_program = "CASTEP"
@property
def _ab_initio_program(self) -> str:
return "CASTEP"

def read_vibrational_or_phonon_data(self) -> AbinsData:
"""
Expand Down
4 changes: 3 additions & 1 deletion scripts/abins/input/crystalloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ def __init__(self, input_ab_initio_filename=None):
self._inv_expansion_matrix = np.eye(3, dtype=FLOAT_TYPE)
self._parser = TextParser()

self._ab_initio_program = "CRYSTAL"
@property
def _ab_initio_program(self) -> str:
return "CRYSTAL"

def read_vibrational_or_phonon_data(self):
"""
Expand Down
5 changes: 4 additions & 1 deletion scripts/abins/input/dmol3loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,12 @@ def __init__(self, input_ab_initio_filename):
:param input_ab_initio_filename: name of file with vibrational data (foo.outmol)
"""
super().__init__(input_ab_initio_filename=input_ab_initio_filename)
self._ab_initio_program = "DMOL3"
self._norm = 0

@property
def _ab_initio_program(self) -> str:
return "DMOL3"

def read_vibrational_or_phonon_data(self):
"""
Reads vibrational data from DMOL3 output files. Saves frequencies, weights of k-point vectors,
Expand Down
22 changes: 4 additions & 18 deletions scripts/abins/input/euphonicloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
# SPDX - License - Identifier: GPL - 3.0 +

from pathlib import Path
from typing import Any, Dict

from euphonic import QpointPhononModes
Expand All @@ -19,18 +18,9 @@
class EuphonicLoader(AbInitioLoader):
"""Get frequencies/eigenvalues from force constants using Euphonic"""

def __init__(self, input_ab_initio_filename):
"""
:param input_ab_initio_filename: name of file with phonon data (foo.phonon)
"""
if not isinstance(input_ab_initio_filename, str):
raise TypeError("Filename must be a string")
elif not Path(input_ab_initio_filename).is_file():
raise IOError(f"Ab initio file {input_ab_initio_filename} not found.")

super().__init__(input_ab_initio_filename=input_ab_initio_filename)
self._ab_initio_program = "FORCECONSTANTS"
@property
def _ab_initio_program(self) -> str:
return "FORCECONSTANTS"

@staticmethod
def data_dict_from_modes(modes: QpointPhononModes) -> Dict[str, Any]:
Expand Down Expand Up @@ -72,10 +62,6 @@ def read_vibrational_or_phonon_data(self):
cutoff = sampling_parameters["force_constants"]["qpt_cutoff"]
modes = euphonic_calculate_modes(filename=self._clerk.get_input_filename(), cutoff=cutoff)
file_data = self.data_dict_from_modes(modes)

# save stuff to hdf file
save_keys = ["frequencies", "weights", "k_vectors", "atomic_displacements", "unit_cell", "atoms"]
data_to_save = {key: file_data[key] for key in save_keys}
self.save_ab_initio_data(data=data_to_save)
self.save_ab_initio_data(data=file_data)

return self._rearrange_data(data=file_data)
8 changes: 5 additions & 3 deletions scripts/abins/input/gaussianloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from .abinitioloader import AbInitioLoader
from .textparser import TextParser
from abins.constants import COMPLEX_TYPE, FLOAT_TYPE, ROTATIONS_AND_TRANSLATIONS
from abins.abinsdata import AbinsData
from mantid.kernel import Atom


Expand All @@ -28,12 +27,15 @@ def __init__(self, input_ab_initio_filename) -> None:
:param input_ab_initio_filename: name of file with vibrational data (foo.log or foo.LOG)
"""
super().__init__(input_ab_initio_filename=input_ab_initio_filename)
self._ab_initio_program = "GAUSSIAN"
self._active_atoms = None
self._num_atoms = None
self._num_read_freq = 0

def read_vibrational_or_phonon_data(self) -> AbinsData:
@property
def _ab_initio_program(self) -> str:
return "GAUSSIAN"

def read_vibrational_or_phonon_data(self):
"""
Reads vibrational data from GAUSSIAN output files. Saves frequencies and atomic displacements (only molecular
calculations), hash of file with vibrational data to <>.hdf5.
Expand Down
145 changes: 145 additions & 0 deletions scripts/abins/input/jsonloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# Mantid Repository : https://github.com/mantidproject/mantid
#
# Copyright &copy; 2024 ISIS Rutherford Appleton Laboratory UKRI,
# NScD Oak Ridge National Laboratory, European Spallation Source,
# Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
# SPDX - License - Identifier: GPL - 3.0 +

from enum import auto, Enum
import json
from pathlib import Path
from typing import Dict

from euphonic import QpointPhononModes
import numpy as np

from .abinitioloader import AbInitioLoader
from .euphonicloader import EuphonicLoader
from abins.abinsdata import AbinsData
from abins.constants import COMPLEX_TYPE, FLOAT_TYPE
from abins.parameters import sampling as sampling_parameters
from dos.load_euphonic import euphonic_calculate_modes

# json-stream implementation converts data lazily so we can quickly check the
# top-level keys even if a data file is huge.
try:
from json_stream import load as json_load
except ImportError:
from json import load as json_load


class PhononJSON(Enum):
EUPHONIC_FREQUENCIES = auto()
EUPHONIC_MODES = auto()
EUPHONIC_FORCE_CONSTANTS = auto()
ABINS_DATA = auto()
UNKNOWN = auto()


abins_supported_json_formats = {PhononJSON.EUPHONIC_MODES, PhononJSON.EUPHONIC_FORCE_CONSTANTS, PhononJSON.ABINS_DATA}


class JSONLoader(AbInitioLoader):
"""Get frequencies/eigenvalues from a JSON file using Euphonic"""

@property
def _ab_initio_program(self) -> str:
return "JSON"

@staticmethod
def check_json_format(json_file: str | Path) -> PhononJSON:
"""Check if JSON file is a known phonon data format"""

for class_key in "__euphonic_class__", "__abins_class__":
with open(json_file, "r") as fd:
data = json_load(fd)
data_class = data.get(class_key)

if data_class is not None:
break
else:
class_key = ""

match class_key, data_class:
case ("__euphonic_class__", "QpointPhononModes"):
return PhononJSON.EUPHONIC_MODES
case ("__euphonic_class__", "QpointFrequencies"):
return PhononJSON.EUPHONIC_FREQUENCIES
case ("__euphonic_class__", "ForceConstants"):
return PhononJSON.EUPHONIC_FORCE_CONSTANTS
case ("__abins_class__", "AbinsData"):
return PhononJSON.ABINS_DATA
case _:
return PhononJSON.UNKNOWN

@staticmethod
def array_from_dict(data: Dict[str | int, np.ndarray], complex=False) -> np.ndarray:
"""Convert from dict of n-d arrays to (n+1-d) array
AbinsData uses these dicts so that frequency data rows can have
different lengths after imaginary modes are removed. e.g.
{"0": np.array([1, 2, 3, 4]), "1": np.array([11, 12, 13, 14, 15, 16])}
This method is not intended to handle such ragged arrays gracefully: it
is for serialising the data before anything is removed.
"""
row_shape = data[next(iter(data))].shape

new_array = np.empty([len(data)] + list(row_shape), dtype=(COMPLEX_TYPE if complex else FLOAT_TYPE))
for i in range(len(new_array)):
new_array[i] = data[str(i)]

return new_array

def save_from_abins_data(self, abins_data: AbinsData) -> None:
"""Save data to hdf5 cache from AbinsData format
Usually we construct a data dict for the cache and then use it to
construct AbinsData. Sometimes it makes sense to do it the other way
around, so this method provides the reverse operation.
"""
data = abins_data.get_kpoints_data().extract()
data["atoms"] = abins_data.get_atoms_data().extract()
for key in ("weights", "k_vectors", "frequencies"):
data[key] = self.array_from_dict(data[key])
data["atomic_displacements"] = self.array_from_dict(data["atomic_displacements"], complex=True)

self.save_ab_initio_data(data=data)

def read_vibrational_or_phonon_data(self) -> AbinsData:
"""Get AbinsData (structure and modes) from force constants data.
Frequencies/displacements are interpolated using the Euphonic library
over a regular q-point mesh. The mesh is determined by a Moreno-Soler
realspace cutoff, related to the size of an equivalent
supercell. Meshes are rounded up so a very small cutoff will yield
gamma-point-only sampling.
"""
json_file = self._clerk.get_input_filename()
json_format = self.check_json_format(json_file)

match json_format:
case PhononJSON.ABINS_DATA:
with open(json_file, "r") as fd:
data = json.load(fd)
abins_data = AbinsData.from_dict(data)
self.save_from_abins_data(abins_data)
return abins_data

case PhononJSON.EUPHONIC_MODES:
modes = QpointPhononModes.from_json_file(json_file)

case PhononJSON.EUPHONIC_FORCE_CONSTANTS:
cutoff = sampling_parameters["force_constants"]["qpt_cutoff"]
modes = euphonic_calculate_modes(filename=json_file, cutoff=cutoff)

case _:
raise ValueError(f"Cannot use JSON data of type {json_format.name}")

file_data = EuphonicLoader.data_dict_from_modes(modes)
self.save_ab_initio_data(data=file_data)
return self._rearrange_data(data=file_data)
Loading

0 comments on commit 8bef42b

Please sign in to comment.