From e8e26ae10b026debb42df86e93d0cde160870e14 Mon Sep 17 00:00:00 2001
From: Marcel Langer <me@sirmarcel.com>
Date: Fri, 1 Nov 2024 14:15:01 +0100
Subject: [PATCH] Make some progress

---
 examples/o3-averaging/data/in-mol.lmp         |  35 ++++
 examples/o3-averaging/data/init-mol.xyz       |   5 +
 examples/o3-averaging/data/input-mol-grid.xml |  38 ++++
 examples/o3-averaging/data/input-mol-noo3.xml |  35 ++++
 examples/o3-averaging/data/replay-noo3.xml    |  26 +++
 examples/o3-averaging/data/water_1_data.lmp   |  41 ++++
 examples/o3-averaging/o3-averaging.py         | 177 +++++++++++++-----
 7 files changed, 311 insertions(+), 46 deletions(-)
 create mode 100644 examples/o3-averaging/data/in-mol.lmp
 create mode 100644 examples/o3-averaging/data/init-mol.xyz
 create mode 100644 examples/o3-averaging/data/input-mol-grid.xml
 create mode 100644 examples/o3-averaging/data/input-mol-noo3.xml
 create mode 100644 examples/o3-averaging/data/replay-noo3.xml
 create mode 100644 examples/o3-averaging/data/water_1_data.lmp
diff --git a/examples/o3-averaging/data/in-mol.lmp b/examples/o3-averaging/data/in-mol.lmp
new file mode 100644
index 00000000..5d620f00
--- /dev/null
+++ b/examples/o3-averaging/data/in-mol.lmp
@@ -0,0 +1,35 @@
+units		electron
+atom_style	full
+
+pair_style      lj/cut/tip4p/cut 1 2 1 1 0.278072379 17.001
+# high-pppm precision and shift to get meaningful fd estimates
+# kspace_style	none
+# kspace_style	pppm/tip4p 1e-5
+pair_modify     shift yes
+bond_style      class2
+angle_style     harmonic
+
+
+read_data	water_1_data.lmp
+pair_coeff  * * 0 0
+pair_coeff  1  1  0.000295147 5.96946
+
+neighbor	2.0 bin
+
+
+timestep	0.00025
+
+#velocity all create 298.0 2345187
+
+#thermo_style	multi
+#thermo		1
+
+#fix		1 all nvt temp 298.0 298.0 30.0 tchain 1
+#fix 1 all nve
+fix 1 all ipi h2o-lammps 32342 unix
+
+
+#dump		1 all xyz 25 dump.xyz
+
+run		100000000
+
diff --git a/examples/o3-averaging/data/init-mol.xyz b/examples/o3-averaging/data/init-mol.xyz
new file mode 100644
index 00000000..18c674e0
--- /dev/null
+++ b/examples/o3-averaging/data/init-mol.xyz
@@ -0,0 +1,5 @@
+3
+Lattice="100.0 0.0 0.0 0.0 100.0 0.0 0.0 0.0 100.0" Properties=species:S:1:pos:R:3:momenta:R:3 ipi_comment="Step:      200000  Bead:       0 positions{angstrom}  cell{angstrom}" pbc="T T T"
+O       -2.89770838       6.76492149      25.78733892       0.41094839      -0.09429088       0.29861888
+H       -3.22693259       6.92327461      24.89600383      -0.31547195       0.04597911      -0.02131858
+H       -3.73951469       6.82619967      26.21403556      -0.09547645       0.04831177      -0.27730030
diff --git a/examples/o3-averaging/data/input-mol-grid.xml b/examples/o3-averaging/data/input-mol-grid.xml
new file mode 100644
index 00000000..0d9cd5e5
--- /dev/null
+++ b/examples/o3-averaging/data/input-mol-grid.xml
@@ -0,0 +1,38 @@
+<!-- safe_stride is the frequency at which i-PI makes internal snapshots for restarting -->
+<simulation verbosity='medium' threading='false' safe_stride='10'>
+    <ffsocket name='lmpserial' mode='unix' pbc='false'>
+      <address>h2o-lammps</address> <latency> 1e-4 </latency>
+    </ffsocket>
+   <ffrotations name='driver-noo3' mode='unix' pbc='false'>
+       <address>h2o-noo3</address>
+       <grid_order> 3 </grid_order>
+       <grid_mode> legendre </grid_mode>
+       <inversion> True </inversion>
+   </ffrotations>
+   <total_steps>10000</total_steps>
+   <output prefix='nve-mol'>
+      <trajectory stride='20' filename='pos' format='xyz'> positions </trajectory>
+      <properties stride='20'> [ step, time, conserved, temperature{kelvin}, kinetic_md, potential, pressure_md ] </properties>
+   </output>
+   <prng>
+      <seed>32123</seed>
+   </prng>
+   <system>
+      <forces> 
+          <force forcefield='lmpserial'></force>
+          <force forcefield='driver-noo3' weight="1.0"/>
+      </forces>
+      <initialize nbeads='1'>
+        <file mode='ase'> init-mol.xyz </file>
+      </initialize>
+      <ensemble>
+         <temperature units='kelvin'> 300 </temperature>
+      </ensemble>
+      <motion mode='dynamics'>
+        <fixcom> True </fixcom>
+        <dynamics mode='nve'>
+            <timestep units='femtosecond'> 0.5 </timestep>
+        </dynamics>
+      </motion>
+  </system>
+</simulation>
diff --git a/examples/o3-averaging/data/input-mol-noo3.xml b/examples/o3-averaging/data/input-mol-noo3.xml
new file mode 100644
index 00000000..4f452f96
--- /dev/null
+++ b/examples/o3-averaging/data/input-mol-noo3.xml
@@ -0,0 +1,35 @@
+<!-- safe_stride is the frequency at which i-PI makes internal snapshots for restarting -->
+<simulation verbosity='medium' threading='false' safe_stride='10'>
+    <ffsocket name='lmpserial' mode='unix' pbc='false'>
+      <address>h2o-lammps</address> <latency> 1e-4 </latency>
+    </ffsocket>
+   <ffsocket name='driver-noo3' mode='unix' pbc='false'>
+       <address>h2o-noo3</address>
+   </ffsocket>
+   <total_steps>10000</total_steps>
+   <output prefix='nve-mol'>
+      <trajectory stride='20' filename='pos' format='xyz'> positions </trajectory>
+      <properties stride='20'> [ step, time, conserved, temperature{kelvin}, kinetic_md, potential, pressure_md ] </properties>
+   </output>
+   <prng>
+      <seed>32123</seed>
+   </prng>
+   <system>
+      <forces> 
+          <force forcefield='lmpserial'></force>
+          <force forcefield='driver-noo3' weight="1.0"/>
+      </forces>
+      <initialize nbeads='1'>
+        <file mode='ase'> init-mol.xyz </file>
+      </initialize>
+      <ensemble>
+         <temperature units='kelvin'> 300 </temperature>
+      </ensemble>
+      <motion mode='dynamics'>
+        <fixcom> True </fixcom>
+        <dynamics mode='nve'>
+            <timestep units='femtosecond'> 0.5 </timestep>
+        </dynamics>
+      </motion>
+  </system>
+</simulation>
diff --git a/examples/o3-averaging/data/replay-noo3.xml b/examples/o3-averaging/data/replay-noo3.xml
new file mode 100644
index 00000000..549c0c71
--- /dev/null
+++ b/examples/o3-averaging/data/replay-noo3.xml
@@ -0,0 +1,26 @@
+<simulation verbosity='medium' safe_stride='100'>
+  <output prefix='water-noo3'>
+      <properties filename='out' stride='1'>  
+         [ step, time{picosecond}, conserved, potential, kinetic_cv, 
+           scaledcoords(fd_delta=5e-3) ] 
+      </properties>
+  </output>
+  <ffsocket name='lmpserial' mode='unix' pbc='false'>
+      <address>h2o-lammps</address> <latency> 1e-4 </latency>
+  </ffsocket>
+  <ffrotations name='noo3' mode='unix' pbc='false'>
+      <address>h2o-noo3</address> <latency> 1e-4 </latency>
+  </ffrotations>
+  <system>
+    <initialize nbeads='1'>
+      <file mode='ase' units='angstrom'> init-mol.xyz </file>
+    </initialize>
+    <forces>
+      <force forcefield='lmpserial'/>
+      <force forcefield='noo3'/>
+    </forces>
+    <motion mode='replay'>
+      <file mode='ase'> replay.xyz </file>
+    </motion>
+  </system>
+</simulation>
diff --git a/examples/o3-averaging/data/water_1_data.lmp b/examples/o3-averaging/data/water_1_data.lmp
new file mode 100644
index 00000000..ccc1220f
--- /dev/null
+++ b/examples/o3-averaging/data/water_1_data.lmp
@@ -0,0 +1,41 @@
+LAMMPS Description
+ 
+         3  atoms
+         2  bonds
+         1  angles
+ 
+           2  atom types
+           1  bond types
+           1  angle types
+ 
+         -19 19.38859 xlo xhi
+         -10 19.38859 ylo yhi
+         -19 19.38859 zlo zhi
+ 
+Masses
+ 
+  1 15.9994
+  2  1.0080
+ 
+Bond Coeffs
+ 
+  1    1.78    0.2708585 -0.327738785 0.231328959
+ 
+Angle Coeffs
+ 
+  1    0.0700  107.400000
+
+Atoms
+
+  1 1 1 -1.1128        7.098  8.901  17.941  
+  2 1 2 0.5564         8.700  8.073  18.276  
+  3 1 2 0.5564         7.555  10.053  16.607 
+
+Bonds
+
+ 1 1 1 2
+ 2 1 1 3
+ 
+Angles
+
+ 1 1 2 1 3
diff --git a/examples/o3-averaging/o3-averaging.py b/examples/o3-averaging/o3-averaging.py
index eb70293b..511bd2fc 100644
--- a/examples/o3-averaging/o3-averaging.py
+++ b/examples/o3-averaging/o3-averaging.py
@@ -1,15 +1,36 @@
 r"""
 Rotational averaging of non-equivariant models
-==========================
+==============================================
 
 :Authors:
 Marcel Langer `@sirmarcel <https://github.com/sirmarcel/>`_;
 Michele Ceriotti `@ceriottm <https://github.com/ceriottm/>`_
 
-DESCRIPTION
+Molecular dynamics (MD) simulates the movement of atoms on the potential energy surface
+(PES) :math:`U(R)` where :math:`R` stands for all :math:`i` atomic positions. Evaluating
+this function makes up the dominating cost in running MD. Machine learning interatomic
+potentials (MLPs) are often used to approximate some expensive first-principles PES at
+high accuracy and reasonably high speed.
+
+It is well known that the PES has underlying physical symmetries: It doesn't change under
+rotations of the coordinates, overall translations in space, and it doesn't depend on the
+order in which we list all the atoms in the system. It has long been believed that it is
+essential for MLPs to exactly fulfil these symmetries. However, this places severe
+constraints on possible model architectures, and so there has been a lot of interest in
+so-called *unconstrained* models that do not respect symmetries exactly, by construction,
+but rather learn them approximately from the training data. In the domain of MLPs, such
+models have largely focused on dropping rotational invariance.
+
+This tutorial, based on a `recent paper <https://doi.org/10.1088/2632-2153/ad86a0>`_,
+explains how to test and control the impact of approximate rotational invariance in
+MLPs for MD simulations.
 """
 
 # %%
+# Setting up
+# ----------
+#
+# First, we import things
 
 import os
 import subprocess
@@ -17,7 +38,7 @@
 import xml.etree.ElementTree as ET
 
 import ase
-import ase.io
+import ase.io, ase.build
 import chemiscope
 import ipi
 import numpy as np
@@ -27,8 +48,9 @@
 )
 
 
-# import matplotlib.pyplot as plt
-# import numpy as np
+import matplotlib.pyplot as plt
+import numpy as np
+
 
 # %%
 # show rotations
@@ -89,63 +111,126 @@
 )
 
 # %%
-# Show the problem
-# ----------------------------------
+# Next, we set up the interface with LAMMPS, which has to be compiled.
+# We use a utility function to compile it. Note that this requires
+# a functioning build system with `gfortran` and `make`.
+
+ipi.install_driver()
 
 
 # %%
+# Non-invariant water potential
+# -----------------------------
+# 
+# Instead of a complex MLP, we'll be using an artificial example in this
+# tutorial: We use the classical "tip4p" forcefield for water, and add an
+# orientation-dependent additional term to simulate missing rotational invariance.
 #
+# Let's have a look at that potential. We'll take a water, and rotate it
+# around 180 degrees in steps of two degrees, and look at the energy:
 
-# Open and show the relevant part of the input
-xmlroot = ET.parse("data/input-noo3.xml").getroot()
-print("      " + ET.tostring(xmlroot.find(".//ffrotations"), encoding="unicode"))
+atoms = ase.build.molecule("H2O")
+atoms.set_cell(np.eye(3) * 5)
+atoms.positions += 2.5
 
+frames = []
+for i in range(0, 180, 2):
+    a = atoms.copy()
+    a.rotate(i, "z", center="COM")
+    frames.append(a)
+print(len(frames))
+ase.io.write("single/replay.xyz", frames)
+# todo: actually do the work, get the results, make plot
 
 # %%
-# Installing the Python driver
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# i-PI comes with a FORTRAN driver, which however has to be installed
-# from source. We use a utility function to compile it. Note that this requires
-# a functioning build system with `gfortran` and `make`.
-
-ipi.install_driver()
+# As we can see, the potential energy changes under rotations. We have a spurious
+# angular dependence in our potential!
 
 # %%
-# We launch the i-PI and LAMMPS processes, exactly as in the
-# ``path-integrals`` example.
-
-# don't rerun if the outputs already exist
-ipi_process = None
-if not os.path.exists("water-noo3.out"):
-    ipi_process = subprocess.Popen(["i-pi", "data/input-noo3.xml"])
-    time.sleep(2)  # wait for i-PI to start
-    lmp_process = [subprocess.Popen(["lmp", "-in", "data/in.lmp"]) for i in range(1)]
-    driver_process = [
-        subprocess.Popen(
-            ["i-pi-driver", "-u", "-a", "h2o-noo3", "-m", "noo3-h2o"], cwd="data/"
-        )
-        for i in range(1)
-    ]
+# Conservation of angular momentum
+# --------------------------------
+# 
+# The most drastic consequence of a lack of rotational invariance is that angular
+# momentum is no longer conserved. We can easily see this by simulating a single
+# water molecule, initialised with zero total angular momentum. With our non-
+# invariant potential, it starts rotating after just a few timesteps:
+
+# todo: actually do the things here.
 
 # %%
-# Skip this cell if you want to run in the background
-if ipi_process is not None:
-    ipi_process.wait()
-    lmp_process[0].wait()
-    driver_process[0].wait()
+# In the manuscript linked at the beginning, we show that for molecules, this
+# leads to a spurious preferred orientation of the molecules, even for NVT
+# simulations. Interestingly, we find that for an approximately invariant MLP
+# that was trained with data augmentation, there is no impact on observables for
+# *liquid* water, which the model was trained on.
+# However, this tutorial is focused on something else: What to do to control
+# the lack of invariance after the fact.
 
 
 # %%
+# Controlling non-invariance with averaging
+# -----------------------------------------
 #
-# Discuss how molecules get oriented
+# A not rotationally invariant potential can be made more invariant by *averaging*
+# over rotations. Rotations are continuous, so we can't recover exact invariance,
+# but we can get close enough for many practical cases, as we show in the paper above.
+#
+# I-Pi has *builtin* support for different symmetrisation schemes: Two different types
+# of grids over rotations (Legendre, Lebedev) and a single randomly chosen rotation.
+# They are invoked by using `ffrotations` in place of `ffsocket` to interface with the driver:
 
-traj_data = ase.io.read("water-noo3.pos_0.extxyz", ":")
+# Open and show the relevant part of the input
+xmlroot = ET.parse("data/input-noo3.xml").getroot()
+print("      " + ET.tostring(xmlroot.find(".//ffrotations"), encoding="unicode"))
 
 # %%
-# then, assemble a visualization
-chemiscope.show(
-    frames=traj_data,
-    settings=chemiscope.quick_settings(trajectory=True),
-    mode="structure",
-)
+# So, let's now run the same dynamics from above *with* this rotational augmentation.
+
+# todo: run, get results, make animation
+
+# %%
+#
+# With this grid, we've managed to successfully control the non-invariance: There is
+# only a very slow remaining precession. In a less "strict" setting, for instance NVT,
+# this would be more than sufficient to mitigate the impacts of the non-invariance.
+
+# scratch space below
+
+# # %%
+# # We launch the i-PI and LAMMPS processes, exactly as in the
+# # ``path-integrals`` example.
+
+# # don't rerun if the outputs already exist
+# ipi_process = None
+# if not os.path.exists("water-noo3.out"):
+#     ipi_process = subprocess.Popen(["i-pi", "data/input-noo3.xml"])
+#     time.sleep(2)  # wait for i-PI to start
+#     lmp_process = [subprocess.Popen(["lmp", "-in", "data/in.lmp"]) for i in range(1)]
+#     driver_process = [
+#         subprocess.Popen(
+#             ["i-pi-driver", "-u", "-a", "h2o-noo3", "-m", "noo3-h2o"], cwd="data/"
+#         )
+#         for i in range(1)
+#     ]
+
+# # %%
+# # Skip this cell if you want to run in the background
+# if ipi_process is not None:
+#     ipi_process.wait()
+#     lmp_process[0].wait()
+#     driver_process[0].wait()
+
+
+# # %%
+# #
+# # Discuss how molecules get oriented
+
+# traj_data = ase.io.read("water-noo3.pos_0.extxyz", ":")
+
+# # %%
+# # then, assemble a visualization
+# chemiscope.show(
+#     frames=traj_data,
+#     settings=chemiscope.quick_settings(trajectory=True),
+#     mode="structure",
+# )