deepmodeling · wanghan-iapcm · Jan 24, 2024 · Jan 20, 2024 · Jan 20, 2024 · Jan 20, 2024
diff --git a/dpdata/orca/__init__.py b/dpdata/orca/__init__.py
diff --git a/dpdata/orca/output.py b/dpdata/orca/output.py
@@ -0,0 +1,64 @@
+from typing import Tuple
+
+import numpy as np
+
+
+def read_orca_sp_output(fn: str) -> Tuple[np.ndarray, np.ndarray, float, np.ndarray]:
+    """Read from ORCA output.
+
+    Note that both the energy and the gradient should be printed.
+
+    Parameters
+    ----------
+    fn : str
+        file name
+
+    Returns
+    -------
+    np.ndarray
+        atomic symbols
+    np.ndarray
+        atomic coordinates
+    float
+        total potential energy
+    np.ndarray
+        atomic forces
+    """
+    coord = None
+    symbols = None
+    forces = None
+    energy = None
+    with open(fn) as f:
+        flag = 0
+        for line in f:
+            if flag in (1, 3, 4):
+                flag += 1
+            elif flag == 2:
+                s = line.split()
+                if not len(s):
+                    flag = 0
+                else:
+                    symbols.append(s[0].capitalize())
+                    coord.append([float(s[1]), float(s[2]), float(s[3])])
+            elif flag == 5:
+                s = line.split()
+                if not len(s):
+                    flag = 0
+                else:
+                    forces.append([float(s[3]), float(s[4]), float(s[5])])
+            elif line.startswith("CARTESIAN COORDINATES (ANGSTROEM)"):
+                # coord
+                flag = 1
+                coord = []
+                symbols = []
+            elif line.startswith("CARTESIAN GRADIENT"):
+                flag = 3
+                forces = []
+            elif line.startswith("FINAL SINGLE POINT ENERGY"):
+                energy = float(line.split()[-1])
+    symbols = np.array(symbols)
+    forces = -np.array(forces)
+    coord = np.array(coord)
+    assert coord.shape == forces.shape
+
+    return symbols, coord, energy, forces
diff --git a/dpdata/plugins/orca.py b/dpdata/plugins/orca.py
@@ -0,0 +1,51 @@
+import numpy as np
+
+from dpdata.format import Format
+from dpdata.orca.output import read_orca_sp_output
+from dpdata.unit import EnergyConversion, ForceConversion
+
+energy_convert = EnergyConversion("hartree", "eV").value()
+force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
+
+
+@Format.register("orca/spout")
+class ORCASPOutFormat(Format):
+    """ORCA single point energy output.
+
+    Note that both the energy and the gradient should be
+    printed into the output file.
+    """
+
+    def from_labeled_system(self, file_name: str, **kwargs) -> dict:
+        """Read from ORCA single point energy output.
+
+        Parameters
+        ----------
+        file_name : str
+            file name
+        **kwargs
+            keyword arguments
+
+        Returns
+        -------
+        dict
+            system data
+        """
+        symbols, coord, energy, forces = read_orca_sp_output(file_name)
+
+        atom_names, atom_types, atom_numbs = np.unique(
+            symbols, return_inverse=True, return_counts=True
+        )
+        natoms = coord.shape[0]
+
+        return {
+            "atom_types": atom_types,
+            "atom_names": list(atom_names),
+            "atom_numbs": list(atom_numbs),
+            "coords": coord.reshape((1, natoms, 3)),
+            "energies": np.array([energy * energy_convert]),
+            "forces": (forces * force_convert).reshape((1, natoms, 3)),
+            "cells": np.zeros((1, 3, 3)),
+            "orig": np.zeros(3),
+            "nopbc": True,
+        }