Merge branch 'main' into tooling

mila-iqia · Apr 5, 2024 · bc84515 · bc84515
2 parents 1147d2e + b5eda3b
commit bc84515
Show file tree

Hide file tree

Showing 11 changed files with 192 additions and 13 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -29,7 +29,7 @@ jobs:
         pip install -e .
     - name: unit tests
       run: |
-        pytest --cov=crystal_diffusion
+        pytest --cov=crystal_diffusion -m "not not_on_github"
     - name: type checking
       run: |
         pytype crystal_diffusion/

diff --git a/crystal_diffusion/data/diffusion/data_preprocess.py b/crystal_diffusion/data/diffusion/data_preprocess.py
@@ -66,7 +66,7 @@ def _convert_coords_to_relative(row: pd.Series) -> List[float]:
         """
         x_lim, y_lim, z_lim = row['box']
         coord_red = [coord for triple in zip(row['x'], row['y'], row['z']) for coord in
-                     (triple[0] / x_lim, triple[1] / y_lim, triple[2] / z_lim)]
+                     ((triple[0] / x_lim) % 1, (triple[1] / y_lim) % 1, (triple[2] / z_lim) % 1)]
         return coord_red
 
     def get_x_relative(self, df: pd.DataFrame) -> pd.DataFrame:

diff --git a/crystal_diffusion/models/score_network.py b/crystal_diffusion/models/score_network.py
@@ -175,8 +175,7 @@ def _forward_unchecked(self, batch: Dict[AnyStr, torch.Tensor]) -> torch.Tensor:
             computed_scores : the scores computed by the model.
         """
         positions = batch[self.position_key]  # shape [batch_size, number_of_atoms, spatial_dimension]
-        times = batch[self.timestep_key]  # shape [batch_size, 1]
-
+        times = batch[self.timestep_key].to(positions.device)  # shape [batch_size, 1]
         input = torch.cat([self.flatten(positions), times], dim=1)
 
         output = self.mlp_layers(input).reshape(positions.shape)

diff --git a/crystal_diffusion/oracle/lammps.py b/crystal_diffusion/oracle/lammps.py
@@ -0,0 +1,66 @@
+"""Call LAMMPS to get the forces and energy in a given configuration."""
+import os
+from pathlib import Path
+from typing import Dict, Tuple
+
+import lammps
+import numpy as np
+import pandas as pd
+import yaml
+from pymatgen.core import Element
+
+from crystal_diffusion import DATA_DIR
+
+
+def get_energy_and_forces_from_lammps(positions: np.ndarray,
+                                      box: np.ndarray,
+                                      atom_types: np.ndarray,
+                                      atom_type_map: Dict[int, str] = {1: 'Si'},
+                                      tmp_work_dir: str = './',
+                                      pair_coeff_dir: Path = DATA_DIR) -> Tuple[float, pd.DataFrame]:
+    """Call LAMMPS to compute the forces on all atoms in a configuration.
+
+    Args:
+        positions: atomic positions as a n_atom x spatial dimension array
+        box: spatial dimension x spatial dimension array representing the periodic box. Assumed to be orthogonal.
+        atom_types: n_atom array with an index representing the type of each atom
+        atom_type_map (optional): map from index representing an atom type to a description of the atom.
+            Defaults to {1: "Si"}
+        tmp_work_dir (optional): directory where the LAMMPS output will be written (and deleted). Defaults to ./
+        pair_coeff_dir (optional): directory where the potentials as .sw files are stored. Defaults to DATA_DIR
+
+    Returns:
+        energy
+        dataframe with x, y, z coordinates and fx, fy, fz information in a dataframe.
+    """
+    n_atom = positions.shape[0]
+    assert atom_types.shape == (n_atom, ), f"Atom types should match the number of atoms. Got {atom_types.shape}."
+    lmp = lammps.lammps()  # create a lammps run
+    assert np.allclose(box, np.diag(np.diag(box))), "only orthogonal LAMMPS box are valid"
+    lmp.command(f"region simbox block 0 {box[0, 0]} 0 {box[1, 1]} 0 {box[2, 2]}")  # TODO what if box is not orthogonal
+    lmp.command("create_box 1 simbox")
+    lmp.command("pair_style sw")
+    for k, v in atom_type_map.items():
+        elem = Element(v)
+        lmp.command(f"mass {k} {elem.atomic_mass.real}")  # the .real is to get the value without the unit
+        lmp.command(f"group {v} type {k}")
+        lmp.command(f"pair_coeff * * {os.path.join(pair_coeff_dir, f'{v.lower()}.sw')} {v}")
+    for i in range(n_atom):
+        lmp.command(f"create_atoms {atom_types[i]} single {' '.join(map(str, positions[i, :]))}")
+    lmp.command("fix 1 all nvt temp 300 300 0.01")  # selections here do not matter because we only do 1 step
+    lmp.command(f"dump 1 all yaml 1 {os.path.join(tmp_work_dir, 'dump.yaml')} id x y z fx fy fz")  # TODO not good in 2D
+    lmp.command("run 0")  # 0 is the last step index - so run 0 means no MD update - just get the initial forces
+
+    # read informations from lammps output
+    with open(os.path.join(tmp_work_dir, "dump.yaml"), "r") as f:
+        dump_yaml = yaml.safe_load_all(f)
+        doc = next(iter(dump_yaml))
+    os.remove(os.path.join(tmp_work_dir, "dump.yaml"))  # no need to keep the output as artefact
+    forces = pd.DataFrame(doc['data'], columns=doc['keywords']).sort_values("id")  # organize in a dataframe
+
+    # get the energy
+    ke = lmp.get_thermo('ke')  # kinetic energy - should be 0 as atoms are created with 0 velocity
+    pe = lmp.get_thermo('pe')  # potential energy
+    energy = ke + pe
+
+    return energy, forces
diff --git a/crystal_diffusion/samplers/noisy_position_sampler.py b/crystal_diffusion/samplers/noisy_position_sampler.py
@@ -79,6 +79,6 @@ def get_noisy_position_sample(real_relative_positions: torch.Tensor, sigmas: tor
             "sigmas array is expected to be of the same shape as the real_relative_positions array"
 
         z_scores = NoisyPositionSampler._get_gaussian_noise(real_relative_positions.shape)
-        noise = sigmas * z_scores
+        noise = (sigmas * z_scores).to(real_relative_positions.device)
         noisy_relative_positions = map_positions_to_unit_cell(real_relative_positions + noise)
         return noisy_relative_positions
diff --git a/crystal_diffusion/score/wrapped_gaussian_score.py b/crystal_diffusion/score/wrapped_gaussian_score.py
@@ -32,8 +32,8 @@
 import numpy as np
 import torch
 
-SIGMA_THRESHOLD = 1.0 / np.sqrt(2.0 * np.pi)
-U_THRESHOLD = 0.5
+SIGMA_THRESHOLD = torch.Tensor([1.0 / np.sqrt(2.0 * np.pi)])
+U_THRESHOLD = torch.Tensor([0.5])
 
 
 def get_sigma_normalized_score_brute_force(u: float, sigma: float, kmax: Optional[int] = None) -> float:
@@ -124,7 +124,8 @@ def get_sigma_normalized_score(
     for mask_calculator, score_calculator in zip(mask_calculators, score_calculators):
         mask = mask_calculator(list_u, list_sigma)
         if mask.any():
-            flat_view[mask] = score_calculator(list_u[mask], list_sigma[mask], list_k)
+            device = flat_view.device
+            flat_view[mask] = score_calculator(list_u[mask], list_sigma.to(device)[mask], list_k.to(device))
 
     return sigma_normalized_scores
 
@@ -139,7 +140,8 @@ def _get_small_sigma_small_u_mask(list_u: torch.Tensor, list_sigma: torch.Tensor
     Returns:
         mask_1a : an array of booleans of shape [Nu]
     """
-    return torch.logical_and(list_sigma <= SIGMA_THRESHOLD, list_u < U_THRESHOLD)
+    device = list_u.device
+    return torch.logical_and(list_sigma.to(device) <= SIGMA_THRESHOLD.to(device), list_u < U_THRESHOLD.to(device))
 
 
 def _get_small_sigma_large_u_mask(list_u: torch.Tensor, list_sigma: torch.Tensor) -> torch.Tensor:
@@ -152,7 +154,8 @@ def _get_small_sigma_large_u_mask(list_u: torch.Tensor, list_sigma: torch.Tensor
     Returns:
         mask_1b : an array of booleans of shape [Nu]
     """
-    return torch.logical_and(list_sigma <= SIGMA_THRESHOLD, list_u >= U_THRESHOLD)
+    device = list_u.device
+    return torch.logical_and(list_sigma.to(device) <= SIGMA_THRESHOLD.to(device), list_u >= U_THRESHOLD.to(device))
 
 
 def _get_large_sigma_mask(list_u: torch.Tensor, list_sigma: torch.Tensor) -> torch.Tensor:

diff --git a/data/si_diffusion_small/create_data.sh b/data/si_diffusion_small/create_data.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+TEMPERATURE=300
+BOX_SIZE=2
+STEP=10000
+CROP=10000
+NTRAIN_RUN=10
+NVALID_RUN=5
+
+NRUN=$(($NTRAIN_RUN + $NVALID_RUN))
+
+for SEED in $(seq 2 $NRUN);
+do
+  if [ "$SEED" -le $NTRAIN_RUN ]; then
+    MODE="train"
+  else
+    MODE="valid"
+  fi
+  echo $MODE $SEED
+  mkdir -p "${MODE}_run_${SEED}"
+  cd "${MODE}_run_${SEED}"
+  lmp < ../in.si.lammps -v STEP $(($STEP + $CROP)) -v T $TEMPERATURE -v S $BOX_SIZE -v SEED $SEED
+
+  # extract the thermodynamic outputs in a yaml file
+  egrep  '^(keywords:|data:$|---$|\.\.\.$|  - \[)' log.lammps > thermo_log.yaml
+
+  mkdir -p "uncropped_outputs"
+  mv "dump.si-${TEMPERATURE}-${BOX_SIZE}.yaml" uncropped_outputs/
+  mv thermo_log.yaml uncropped_outputs/
+
+  tail -n 760076 uncropped_outputs/dump.si-300-2.yaml > lammps_dump.yaml
+
+  python ../../crop_lammps_outputs.py \
+      --lammps_yaml "uncropped_outputs/dump.si-${TEMPERATURE}-${BOX_SIZE}.yaml" \
+      --lammps_thermo "uncropped_outputs/thermo_log.yaml" \
+      --crop $CROP \
+      --output_dir ./
+
+  cd ..
+done
diff --git a/data/si_diffusion_small/in.si.lammps b/data/si_diffusion_small/in.si.lammps
@@ -0,0 +1,29 @@
+log log.lammps
+
+units           metal
+atom_style      atomic
+atom_modify     map array
+
+lattice         diamond 5.43
+region          simbox block 0 ${S} 0 ${S} 0 ${S}
+create_box      1 simbox
+create_atoms    1 region simbox
+
+mass 1 28.0855
+
+group Si type 1
+
+pair_style sw
+pair_coeff * * ../../si.sw Si
+
+velocity all create ${T} ${SEED}
+
+dump 1 all yaml 1 dump.si-${T}-${S}.yaml id type x y z fx fy fz
+
+thermo_style yaml
+thermo 1
+#==========================Output files========================
+
+fix 1 all nvt temp ${T} ${T} 0.01
+run ${STEP}
+unfix 1
diff --git a/requirements.txt b/requirements.txt
@@ -5,6 +5,7 @@ gitpython==3.1.27
 isort==5.13.2
 jupyter==1.0.0
 jinja2==3.1.2
+lammps
 maml==2023.9.9
 monty==2024.2.2
 myst-parser==2.0.0

diff --git a/tests/data/diffusion/test_data_preprocess.py b/tests/data/diffusion/test_data_preprocess.py
@@ -99,9 +99,9 @@ def sample_coordinates(box_coordinates):
     # Sample data frame
     return pd.DataFrame({
         'box': [box_coordinates],
-        'x': [[60, 6, 0.6, 0.06]],
-        'y': [[120, 12, 1.2, 0.12]],
-        'z': [[180, 18, 1.8, 0.18]]
+        'x': [[0.6, 0.06, 0.006, 0.00006]],
+        'y': [[1.2, 0.12, 0.0012, 0.00012]],
+        'z': [[1.8, 0.18, 0.018, 0.0018]]
     })
 
 

diff --git a/tests/oracle/test_lammps.py b/tests/oracle/test_lammps.py
@@ -0,0 +1,41 @@
+import numpy as np
+import pytest
+
+from crystal_diffusion.oracle.lammps import get_energy_and_forces_from_lammps
+
+
+@pytest.fixture
+def high_symmetry_lattice():
+    box = np.eye(3) * 4
+    return box
+
+
+@pytest.fixture
+def high_symmetry_positions():
+    positions = np.array([[0, 0, 0], [2, 2, 2]])
+    return positions
+
+
+# do not run on github because no lammps
+@pytest.mark.not_on_github
+def test_high_symmetry(high_symmetry_positions, high_symmetry_lattice):
+    energy, forces = get_energy_and_forces_from_lammps(high_symmetry_positions, high_symmetry_lattice,
+                                                       atom_types=np.array([1, 1]))
+    for x in ['x', 'y', 'z']:
+        assert np.allclose(forces[f'f{x}'], [0, 0])
+    assert energy < 0
+
+
+@pytest.fixture
+def low_symmetry_positions():
+    positions = np.array([[0.23, 1.2, 2.01], [3.2, 0.9, 3.87]])
+    return positions
+
+
+@pytest.mark.not_on_github
+def test_low_symmetry(low_symmetry_positions, high_symmetry_lattice):
+    energy, forces = get_energy_and_forces_from_lammps(low_symmetry_positions, high_symmetry_lattice,
+                                                       atom_types=np.array([1, 1]))
+    for x in ['x', 'y', 'z']:
+        assert not np.allclose(forces[f'f{x}'], [0, 0])
+    assert energy < 0