Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new format for OpenMX #585

Merged
merged 12 commits into from
Dec 6, 2023
Binary file added .DS_Store
Binary file not shown.
Empty file added dpdata/openmx/__init__.py
Empty file.
197 changes: 197 additions & 0 deletions dpdata/openmx/omx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#!/usr/bin/python3
import numpy as np

from ..unit import (
EnergyConversion,
ForceConversion,
LengthConversion,
PressureConversion,
)

ry2ev = EnergyConversion("rydberg", "eV").value()
kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value()

length_convert = LengthConversion("bohr", "angstrom").value()
energy_convert = EnergyConversion("hartree", "eV").value()
force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()

from collections import OrderedDict

### iterout.c from OpenMX soure code: column numbers and physical quantities ###
# /* 1: */
# /* 2,3,4: */
# /* 5,6,7: force *
# /* 8: x-component of velocity */
# /* 9: y-component of velocity */
# /* 10: z-component of velocity */
# /* 11: Net charge, electron charge is defined to be negative. */
# /* 12: magnetic moment (muB) */
# /* 13,14: angles of spin */


def load_atom(lines):
atom_names = []
atom_names_mode = False
for line in lines:
if "<Atoms.SpeciesAndCoordinates" in line:
atom_names_mode = True
elif "Atoms.SpeciesAndCoordinates>" in line:
atom_names_mode = False
elif atom_names_mode:
parts = line.split()
atom_names.append(parts[1])
natoms = len(atom_names)
atom_names_original = atom_names
atom_names = list(OrderedDict.fromkeys(set(atom_names))) # Python>=3.7
atom_names = sorted(
atom_names, key=atom_names_original.index
) # Unique ordering of atomic species
ntypes = len(atom_names)
atom_numbs = [0] * ntypes
atom_types = []
atom_types_mode = False
for line in lines:
if "<Atoms.SpeciesAndCoordinates" in line:
atom_types_mode = True
elif "Atoms.SpeciesAndCoordinates>" in line:
atom_types_mode = False
elif atom_types_mode:
parts = line.split()
for i, atom_name in enumerate(atom_names):
if parts[1] == atom_name:
atom_numbs[i] += 1
atom_types.append(i)
atom_types = np.array(atom_types)
return atom_names, atom_types, atom_numbs


def load_cells(lines):
cell, cells = [], []
for index, line in enumerate(lines):
if "Cell_Vectors=" in line:
parts = line.split()
cell.append([float(parts[12]), float(parts[13]), float(parts[14])])
cell.append([float(parts[15]), float(parts[16]), float(parts[17])])
cell.append([float(parts[18]), float(parts[19]), float(parts[20])])
cells.append(cell)
cell = []
cells = np.array(cells)
return cells


# load atom_names, atom_numbs, atom_types, cells
def load_param_file(fname, mdname):
with open(fname) as dat_file:
lines = dat_file.readlines()
atom_names, atom_types, atom_numbs = load_atom(lines)

with open(mdname) as md_file:
lines = md_file.readlines()
cells = load_cells(lines)
return atom_names, atom_numbs, atom_types, cells


def load_coords(lines, atom_names, natoms):
cnt = 0
coord, coords = [], []
for index, line in enumerate(lines):
if "time=" in line:
continue
for atom_name in atom_names:
atom_name += " "
if atom_name in line:
cnt += 1
parts = line.split()
for_line = [float(parts[1]), float(parts[2]), float(parts[3])]
coord.append(for_line)
if cnt == natoms:
coords.append(coord)
cnt = 0
coord = []
coords = np.array(coords)
return coords


def load_data(mdname, atom_names, natoms):
with open(mdname) as md_file:
lines = md_file.readlines()
coords = load_coords(lines, atom_names, natoms)
steps = [str(i) for i in range(1, coords.shape[0] + 1)]
return coords, steps


def to_system_data(fname, mdname):
data = {}
(
data["atom_names"],
data["atom_numbs"],
data["atom_types"],
data["cells"],
) = load_param_file(fname, mdname)
data["coords"], steps = load_data(
mdname,
data["atom_names"],
np.sum(data["atom_numbs"]),
)
data["orig"] = np.zeros(3)
return data, steps


def load_energy(lines):
energy = []
for line in lines:
if "time=" in line:
parts = line.split()
ene_line = float(parts[4]) # Hartree
energy.append(ene_line)
continue
energy = energy_convert * np.array(energy) # Hartree -> eV
return energy


def load_force(lines, atom_names, atom_numbs):
cnt = 0
field, fields = [], []
for index, line in enumerate(lines):
if "time=" in line:
continue
for atom_name in atom_names:
atom_name += " "
if atom_name in line:
cnt += 1
parts = line.split()
for_line = [float(parts[4]), float(parts[5]), float(parts[6])]
field.append(for_line)
if cnt == np.sum(atom_numbs):
fields.append(field)
cnt = 0
field = []
force = force_convert * np.array(fields)
return force


# load energy, force
def to_system_label(fname, mdname):
atom_names, atom_numbs, atom_types, cells = load_param_file(fname, mdname)
with open(mdname) as md_file:
lines = md_file.readlines()
energy = load_energy(lines)
force = load_force(lines, atom_names, atom_numbs)
return energy, force


if __name__ == "__main__":
file_name = "Cdia"
fname = f"{file_name}.dat"
mdname = f"{file_name}.md"
atom_names, atom_numbs, atom_types, cells = load_param_file(fname, mdname)
coords, steps = load_data(mdname, atom_names, np.sum(atom_numbs))
data, steps = to_system_data(fname, mdname)
energy, force = to_system_label(fname, mdname)
print(atom_names)
print(atom_numbs)
print(atom_types)

Check warning on line 193 in dpdata/openmx/omx.py

View check run for this annotation

Codecov / codecov/patch

dpdata/openmx/omx.py#L184-L193

Added lines #L184 - L193 were not covered by tests
# print(cells.shape)
# print(coords.shape)
# print(len(energy))
# print(force.shape)
70 changes: 70 additions & 0 deletions dpdata/plugins/openmx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import dpdata.md.pbc
import dpdata.openmx.omx
from dpdata.format import Format


@Format.register("openmx/out")
class OPENMXFormat(Format):
Copy link
Member

@njzjz njzjz Dec 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest adding some documentation for this new format. For example, a link to external documentation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@njzjz Thank you for the reply.

I have a question. What is the “documentation" in your message?

For example, on page 36 of the dpdata documentation, there are description about qe/cp/traj, right? Do you meant that a similar description should be added for openmx?

Or is it sufficient to simply attach a link to the DFT package OpenMX, like this?

I look forward to your advice.

Copy link
Member

@njzjz njzjz Dec 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something like this, e.g. what openmx is and what this format is.

class ASEStructureFormat(Format):
"""Format for the `Atomic Simulation Environment <https://wiki.fysik.dtu.dk/ase/>`_ (ase).
ASE supports parsing a few dozen of data formats. As described in i
`the documentation <ihttps://wiki.fysik.dtu.dk/ase/ase/io/io.html>`_,
many of these formats can be determined automatically.
Use the `ase_fmt` keyword argument to supply the format if
automatic detection fails.
"""

Btw, it may be better to rename it to openmx/out (or something like this) or add an alias to show this is the output but not the input. The input file can also be a format.

Copy link
Contributor Author

@shigeandtomo shigeandtomo Dec 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@njzjz Thank you.

I maked out what you meant. So, I added some documentation for this new format, and changed keyword from openmx to openmx/out.

"""Format for the `OpenMX <https://www.openmx-square.org/>`.

OpenMX (Open source package for Material eXplorer) is a nano-scale material simulation package based on DFT, norm-conserving pseudopotentials, and pseudo-atomic localized basis functions.

Note that two output files, System.Name.dat and System.Name.md, are required.

Use the `openmx/out` keyword argument to supply this format.
"""

@Format.post("rot_lower_triangular")
def from_system(self, file_name: str, **kwargs) -> dict:
"""Read from OpenMX output.

Parameters
----------
file_name : str
file name, which is specified by a input file, i.e. System.Name.dat
**kwargs : dict
other parameters

Returns
-------
dict
data dict
"""
fname = f"{file_name}.dat"
mdname = f"{file_name}.md"

data, _ = dpdata.openmx.omx.to_system_data(fname, mdname)
data["coords"] = dpdata.md.pbc.apply_pbc(
data["coords"],
data["cells"],
)
return data

@Format.post("rot_lower_triangular")
def from_labeled_system(self, file_name: str, **kwargs) -> dict:
"""Read from OpenMX output.

Parameters
----------
file_name : str
file name, which is specified by a input file, i.e. System.Name.dat
**kwargs : dict
other parameters

Returns
-------
dict
data dict
"""
fname = f"{file_name}.dat"
mdname = f"{file_name}.md"

data, cs = dpdata.openmx.omx.to_system_data(fname, mdname)
data["coords"] = dpdata.md.pbc.apply_pbc(
data["coords"],
data["cells"],
)
data["energies"], data["forces"] = dpdata.openmx.omx.to_system_label(
fname, mdname
)
return data
73 changes: 73 additions & 0 deletions tests/openmx/Methane.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#
# File Name
#

System.CurrrentDirectory ./ # default=./
System.Name Methane
level.of.stdout 1 # default=1 (1-3)
level.of.fileout 1 # default=1 (0-2)

#
# Definition of Atomic Species
#

Species.Number 2
<Definition.of.Atomic.Species
H H6.0-s2p1 H_PBE19
C C6.0-s2p2d1 C_PBE19
Definition.of.Atomic.Species>

#
# Atoms
#

Atoms.Number 5
Atoms.SpeciesAndCoordinates.Unit Ang # Ang|AU
<Atoms.SpeciesAndCoordinates
1 C 0.000000 0.000000 0.000000 2.0 2.0
2 H -0.889981 -0.629312 0.000000 0.5 0.5
3 H 0.000000 0.629312 -0.889981 0.5 0.5
4 H 0.000000 0.629312 0.889981 0.5 0.5
5 H 0.889981 -0.629312 0.000000 0.5 0.5
Atoms.SpeciesAndCoordinates>
Atoms.UnitVectors.Unit Ang # Ang|AU
<Atoms.UnitVectors
10.0 0.0 0.0
0.0 10.0 0.0
0.0 0.0 10.0
Atoms.UnitVectors>

#
# SCF or Electronic System
#

scf.XcType GGA-PBE # LDA|LSDA-CA|LSDA-PW|GGA-PBE
scf.SpinPolarization off # On|Off|NC
scf.ElectronicTemperature 300.0 # default=300 (K)
scf.energycutoff 120.0 # default=150 (Ry)
scf.maxIter 100 # default=40
scf.EigenvalueSolver cluster # DC|GDC|Cluster|Band
scf.Kgrid 1 1 1 # means n1 x n2 x n3
scf.Mixing.Type rmm-diis # Simple|Rmm-Diis|Gr-Pulay|Kerker|Rmm-Diisk
scf.Init.Mixing.Weight 0.200 # default=0.30
scf.Min.Mixing.Weight 0.001 # default=0.001
scf.Max.Mixing.Weight 0.200 # default=0.40
scf.Mixing.History 7 # default=5
scf.Mixing.StartPulay 4 # default=6
scf.criterion 1.0e-10 # default=1.0e-6 (Hartree)
scf.lapack.dste dstevx # dstevx|dstedc|dstegr,default=dstevx

#
# MD or Geometry Optimization
#

MD.Type NVT_NH # Nomd|Opt|NVE|NVT_VS|NVT_NH
MD.maxIter 200 # default=1
MD.TimeStep 1.0 # default=0.5 (fs)
NH.Mass.HeatBath 30.0 # default = 20.0

<MD.TempControl
2
1 300.0
100 300.0
MD.TempControl>
Loading
Loading