deepmodeling · wanghan-iapcm · Dec 6, 2023 · Nov 29, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/.DS_Store b/.DS_Store
diff --git a/dpdata/openmx/__init__.py b/dpdata/openmx/__init__.py
diff --git a/dpdata/openmx/omx.py b/dpdata/openmx/omx.py
@@ -0,0 +1,197 @@
+#!/usr/bin/python3
+import numpy as np
+
+from ..unit import (
+    EnergyConversion,
+    ForceConversion,
+    LengthConversion,
+    PressureConversion,
+)
+
+ry2ev = EnergyConversion("rydberg", "eV").value()
+kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value()
+
+length_convert = LengthConversion("bohr", "angstrom").value()
+energy_convert = EnergyConversion("hartree", "eV").value()
+force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
+
+from collections import OrderedDict
+
+### iterout.c from OpenMX soure code: column numbers and physical quantities ###
+# /* 1: */
+# /* 2,3,4: */
+# /* 5,6,7: force *
+# /* 8: x-component of velocity */
+# /* 9: y-component of velocity */
+# /* 10: z-component of velocity */
+# /* 11: Net charge, electron charge is defined to be negative. */
+# /* 12: magnetic moment (muB) */
+# /* 13,14: angles of spin */
+
+
+def load_atom(lines):
+    atom_names = []
+    atom_names_mode = False
+    for line in lines:
+        if "<Atoms.SpeciesAndCoordinates" in line:
+            atom_names_mode = True
+        elif "Atoms.SpeciesAndCoordinates>" in line:
+            atom_names_mode = False
+        elif atom_names_mode:
+            parts = line.split()
+            atom_names.append(parts[1])
+    natoms = len(atom_names)
+    atom_names_original = atom_names
+    atom_names = list(OrderedDict.fromkeys(set(atom_names)))  # Python>=3.7
+    atom_names = sorted(
+        atom_names, key=atom_names_original.index
+    )  # Unique ordering of atomic species
+    ntypes = len(atom_names)
+    atom_numbs = [0] * ntypes
+    atom_types = []
+    atom_types_mode = False
+    for line in lines:
+        if "<Atoms.SpeciesAndCoordinates" in line:
+            atom_types_mode = True
+        elif "Atoms.SpeciesAndCoordinates>" in line:
+            atom_types_mode = False
+        elif atom_types_mode:
+            parts = line.split()
+            for i, atom_name in enumerate(atom_names):
+                if parts[1] == atom_name:
+                    atom_numbs[i] += 1
+                    atom_types.append(i)
+    atom_types = np.array(atom_types)
+    return atom_names, atom_types, atom_numbs
+
+
+def load_cells(lines):
+    cell, cells = [], []
+    for index, line in enumerate(lines):
+        if "Cell_Vectors=" in line:
+            parts = line.split()
+            cell.append([float(parts[12]), float(parts[13]), float(parts[14])])
+            cell.append([float(parts[15]), float(parts[16]), float(parts[17])])
+            cell.append([float(parts[18]), float(parts[19]), float(parts[20])])
+            cells.append(cell)
+            cell = []
+    cells = np.array(cells)
+    return cells
+
+
+# load atom_names, atom_numbs, atom_types, cells
+def load_param_file(fname, mdname):
+    with open(fname) as dat_file:
+        lines = dat_file.readlines()
+    atom_names, atom_types, atom_numbs = load_atom(lines)
+
+    with open(mdname) as md_file:
+        lines = md_file.readlines()
+    cells = load_cells(lines)
+    return atom_names, atom_numbs, atom_types, cells
+
+
+def load_coords(lines, atom_names, natoms):
+    cnt = 0
+    coord, coords = [], []
+    for index, line in enumerate(lines):
+        if "time=" in line:
+            continue
+        for atom_name in atom_names:
+            atom_name += " "
+            if atom_name in line:
+                cnt += 1
+                parts = line.split()
+                for_line = [float(parts[1]), float(parts[2]), float(parts[3])]
+                coord.append(for_line)
+        if cnt == natoms:
+            coords.append(coord)
+            cnt = 0
+            coord = []
+    coords = np.array(coords)
+    return coords
+
+
+def load_data(mdname, atom_names, natoms):
+    with open(mdname) as md_file:
+        lines = md_file.readlines()
+    coords = load_coords(lines, atom_names, natoms)
+    steps = [str(i) for i in range(1, coords.shape[0] + 1)]
+    return coords, steps
+
+
+def to_system_data(fname, mdname):
+    data = {}
+    (
+        data["atom_names"],
+        data["atom_numbs"],
+        data["atom_types"],
+        data["cells"],
+    ) = load_param_file(fname, mdname)
+    data["coords"], steps = load_data(
+        mdname,
+        data["atom_names"],
+        np.sum(data["atom_numbs"]),
+    )
+    data["orig"] = np.zeros(3)
+    return data, steps
+
+
+def load_energy(lines):
+    energy = []
+    for line in lines:
+        if "time=" in line:
+            parts = line.split()
+            ene_line = float(parts[4])  # Hartree
+            energy.append(ene_line)
+            continue
+    energy = energy_convert * np.array(energy)  # Hartree -> eV
+    return energy
+
+
+def load_force(lines, atom_names, atom_numbs):
+    cnt = 0
+    field, fields = [], []
+    for index, line in enumerate(lines):
+        if "time=" in line:
+            continue
+        for atom_name in atom_names:
+            atom_name += " "
+            if atom_name in line:
+                cnt += 1
+                parts = line.split()
+                for_line = [float(parts[4]), float(parts[5]), float(parts[6])]
+                field.append(for_line)
+        if cnt == np.sum(atom_numbs):
+            fields.append(field)
+            cnt = 0
+            field = []
+    force = force_convert * np.array(fields)
+    return force
+
+
+# load energy, force
+def to_system_label(fname, mdname):
+    atom_names, atom_numbs, atom_types, cells = load_param_file(fname, mdname)
+    with open(mdname) as md_file:
+        lines = md_file.readlines()
+    energy = load_energy(lines)
+    force = load_force(lines, atom_names, atom_numbs)
+    return energy, force
+
+
+if __name__ == "__main__":
+    file_name = "Cdia"
+    fname = f"{file_name}.dat"
+    mdname = f"{file_name}.md"
+    atom_names, atom_numbs, atom_types, cells = load_param_file(fname, mdname)
+    coords, steps = load_data(mdname, atom_names, np.sum(atom_numbs))
+    data, steps = to_system_data(fname, mdname)
+    energy, force = to_system_label(fname, mdname)
+    print(atom_names)
+    print(atom_numbs)
+    print(atom_types)
+    # print(cells.shape)
+    # print(coords.shape)
+    # print(len(energy))
+    # print(force.shape)
diff --git a/dpdata/plugins/openmx.py b/dpdata/plugins/openmx.py
@@ -0,0 +1,70 @@
+import dpdata.md.pbc
+import dpdata.openmx.omx
+from dpdata.format import Format
+
+
+@Format.register("openmx/out")
+class OPENMXFormat(Format):
 class ASEStructureFormat(Format): 
     """Format for the `Atomic Simulation Environment <https://wiki.fysik.dtu.dk/ase/>`_ (ase). 
     ASE supports parsing a few dozen of data formats. As described in i 
     `the documentation <ihttps://wiki.fysik.dtu.dk/ase/ase/io/io.html>`_, 
     many of these formats can be determined automatically. 
     Use the `ase_fmt` keyword argument to supply the format if 
     automatic detection fails. 
     """ 
 class ASEStructureFormat(Format): 
     """Format for the `Atomic Simulation Environment <https://wiki.fysik.dtu.dk/ase/>`_ (ase). 
  
     ASE supports parsing a few dozen of data formats. As described in i 
     `the documentation <ihttps://wiki.fysik.dtu.dk/ase/ase/io/io.html>`_, 
     many of these formats can be determined automatically. 
     Use the `ase_fmt` keyword argument to supply the format if 
     automatic detection fails. 
     """ 
+    """Format for the `OpenMX <https://www.openmx-square.org/>`.
+
+    OpenMX (Open source package for Material eXplorer) is a nano-scale material simulation package based on DFT, norm-conserving pseudopotentials, and pseudo-atomic localized basis functions.
+
+    Note that two output files, System.Name.dat and System.Name.md, are required.
+
+    Use the `openmx/out` keyword argument to supply this format.
+    """
+
+    @Format.post("rot_lower_triangular")
+    def from_system(self, file_name: str, **kwargs) -> dict:
+        """Read from OpenMX output.
+
+        Parameters
+        ----------
+        file_name : str
+            file name, which is specified by a input file, i.e. System.Name.dat
+        **kwargs : dict
+            other parameters
+
+        Returns
+        -------
+        dict
+            data dict
+        """
+        fname = f"{file_name}.dat"
+        mdname = f"{file_name}.md"
+
+        data, _ = dpdata.openmx.omx.to_system_data(fname, mdname)
+        data["coords"] = dpdata.md.pbc.apply_pbc(
+            data["coords"],
+            data["cells"],
+        )
+        return data
+
+    @Format.post("rot_lower_triangular")
+    def from_labeled_system(self, file_name: str, **kwargs) -> dict:
+        """Read from OpenMX output.
+
+        Parameters
+        ----------
+        file_name : str
+            file name, which is specified by a input file, i.e. System.Name.dat
+        **kwargs : dict
+            other parameters
+
+        Returns
+        -------
+        dict
+            data dict
+        """
+        fname = f"{file_name}.dat"
+        mdname = f"{file_name}.md"
+
+        data, cs = dpdata.openmx.omx.to_system_data(fname, mdname)
+        data["coords"] = dpdata.md.pbc.apply_pbc(
+            data["coords"],
+            data["cells"],
+        )
+        data["energies"], data["forces"] = dpdata.openmx.omx.to_system_label(
+            fname, mdname
+        )
+        return data
diff --git a/tests/openmx/Methane.dat b/tests/openmx/Methane.dat
@@ -0,0 +1,73 @@
+#
+# File Name      
+#
+
+System.CurrrentDirectory         ./    # default=./
+System.Name                      Methane
+level.of.stdout                   1    # default=1 (1-3)
+level.of.fileout                  1    # default=1 (0-2)
+
+#
+# Definition of Atomic Species
+#
+
+Species.Number       2
+<Definition.of.Atomic.Species
+ H   H6.0-s2p1          H_PBE19
+ C   C6.0-s2p2d1        C_PBE19
+Definition.of.Atomic.Species>
+
+#
+# Atoms
+#
+
+Atoms.Number         5
+Atoms.SpeciesAndCoordinates.Unit   Ang # Ang|AU
+<Atoms.SpeciesAndCoordinates  
+ 1  C      0.000000    0.000000    0.000000     2.0  2.0 
+ 2  H     -0.889981   -0.629312    0.000000     0.5  0.5
+ 3  H      0.000000    0.629312   -0.889981     0.5  0.5
+ 4  H      0.000000    0.629312    0.889981     0.5  0.5
+ 5  H      0.889981   -0.629312    0.000000     0.5  0.5
+Atoms.SpeciesAndCoordinates>
+Atoms.UnitVectors.Unit             Ang # Ang|AU
+<Atoms.UnitVectors                     
+  10.0   0.0   0.0  
+   0.0  10.0   0.0  
+   0.0   0.0  10.0  
+Atoms.UnitVectors>
+
+#
+# SCF or Electronic System
+#
+
+scf.XcType                 GGA-PBE     # LDA|LSDA-CA|LSDA-PW|GGA-PBE
+scf.SpinPolarization        off        # On|Off|NC
+scf.ElectronicTemperature  300.0       # default=300 (K)
+scf.energycutoff           120.0       # default=150 (Ry)
+scf.maxIter                 100        # default=40
+scf.EigenvalueSolver        cluster    # DC|GDC|Cluster|Band
+scf.Kgrid                  1 1 1       # means n1 x n2 x n3
+scf.Mixing.Type           rmm-diis     # Simple|Rmm-Diis|Gr-Pulay|Kerker|Rmm-Diisk
+scf.Init.Mixing.Weight     0.200       # default=0.30 
+scf.Min.Mixing.Weight      0.001       # default=0.001 
+scf.Max.Mixing.Weight      0.200       # default=0.40 
+scf.Mixing.History          7          # default=5
+scf.Mixing.StartPulay       4          # default=6
+scf.criterion             1.0e-10      # default=1.0e-6 (Hartree) 
+scf.lapack.dste            dstevx      # dstevx|dstedc|dstegr,default=dstevx
+
+#
+# MD or Geometry Optimization
+#
+
+MD.Type                  NVT_NH        # Nomd|Opt|NVE|NVT_VS|NVT_NH
+MD.maxIter                  200        # default=1
+MD.TimeStep                 1.0        # default=0.5 (fs)
+NH.Mass.HeatBath           30.0        # default = 20.0
+
+<MD.TempControl
+  2
+  1    300.0
+  100  300.0
+MD.TempControl>