diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f54ed060..7ccac1c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,7 @@ jobs: pip install -e . - name: unit tests run: | - pytest --cov=crystal_diffusion + pytest --cov=crystal_diffusion -m "not not_on_github" - name: type checking run: | pytype crystal_diffusion/ diff --git a/crystal_diffusion/data/diffusion/data_preprocess.py b/crystal_diffusion/data/diffusion/data_preprocess.py index 16cbf1c9..7636e2e1 100644 --- a/crystal_diffusion/data/diffusion/data_preprocess.py +++ b/crystal_diffusion/data/diffusion/data_preprocess.py @@ -66,7 +66,7 @@ def _convert_coords_to_relative(row: pd.Series) -> List[float]: """ x_lim, y_lim, z_lim = row['box'] coord_red = [coord for triple in zip(row['x'], row['y'], row['z']) for coord in - (triple[0] / x_lim, triple[1] / y_lim, triple[2] / z_lim)] + ((triple[0] / x_lim) % 1, (triple[1] / y_lim) % 1, (triple[2] / z_lim) % 1)] return coord_red def get_x_relative(self, df: pd.DataFrame) -> pd.DataFrame: diff --git a/crystal_diffusion/models/score_network.py b/crystal_diffusion/models/score_network.py index 8fae4e23..15c266fe 100644 --- a/crystal_diffusion/models/score_network.py +++ b/crystal_diffusion/models/score_network.py @@ -175,8 +175,7 @@ def _forward_unchecked(self, batch: Dict[AnyStr, torch.Tensor]) -> torch.Tensor: computed_scores : the scores computed by the model. """ positions = batch[self.position_key] # shape [batch_size, number_of_atoms, spatial_dimension] - times = batch[self.timestep_key] # shape [batch_size, 1] - + times = batch[self.timestep_key].to(positions.device) # shape [batch_size, 1] input = torch.cat([self.flatten(positions), times], dim=1) output = self.mlp_layers(input).reshape(positions.shape) diff --git a/crystal_diffusion/oracle/lammps.py b/crystal_diffusion/oracle/lammps.py new file mode 100644 index 00000000..731bc5a6 --- /dev/null +++ b/crystal_diffusion/oracle/lammps.py @@ -0,0 +1,66 @@ +"""Call LAMMPS to get the forces and energy in a given configuration.""" +import os +from pathlib import Path +from typing import Dict, Tuple + +import lammps +import numpy as np +import pandas as pd +import yaml +from pymatgen.core import Element + +from crystal_diffusion import DATA_DIR + + +def get_energy_and_forces_from_lammps(positions: np.ndarray, + box: np.ndarray, + atom_types: np.ndarray, + atom_type_map: Dict[int, str] = {1: 'Si'}, + tmp_work_dir: str = './', + pair_coeff_dir: Path = DATA_DIR) -> Tuple[float, pd.DataFrame]: + """Call LAMMPS to compute the forces on all atoms in a configuration. + + Args: + positions: atomic positions as a n_atom x spatial dimension array + box: spatial dimension x spatial dimension array representing the periodic box. Assumed to be orthogonal. + atom_types: n_atom array with an index representing the type of each atom + atom_type_map (optional): map from index representing an atom type to a description of the atom. + Defaults to {1: "Si"} + tmp_work_dir (optional): directory where the LAMMPS output will be written (and deleted). Defaults to ./ + pair_coeff_dir (optional): directory where the potentials as .sw files are stored. Defaults to DATA_DIR + + Returns: + energy + dataframe with x, y, z coordinates and fx, fy, fz information in a dataframe. + """ + n_atom = positions.shape[0] + assert atom_types.shape == (n_atom, ), f"Atom types should match the number of atoms. Got {atom_types.shape}." + lmp = lammps.lammps() # create a lammps run + assert np.allclose(box, np.diag(np.diag(box))), "only orthogonal LAMMPS box are valid" + lmp.command(f"region simbox block 0 {box[0, 0]} 0 {box[1, 1]} 0 {box[2, 2]}") # TODO what if box is not orthogonal + lmp.command("create_box 1 simbox") + lmp.command("pair_style sw") + for k, v in atom_type_map.items(): + elem = Element(v) + lmp.command(f"mass {k} {elem.atomic_mass.real}") # the .real is to get the value without the unit + lmp.command(f"group {v} type {k}") + lmp.command(f"pair_coeff * * {os.path.join(pair_coeff_dir, f'{v.lower()}.sw')} {v}") + for i in range(n_atom): + lmp.command(f"create_atoms {atom_types[i]} single {' '.join(map(str, positions[i, :]))}") + lmp.command("fix 1 all nvt temp 300 300 0.01") # selections here do not matter because we only do 1 step + lmp.command(f"dump 1 all yaml 1 {os.path.join(tmp_work_dir, 'dump.yaml')} id x y z fx fy fz") # TODO not good in 2D + lmp.command("run 0") # 0 is the last step index - so run 0 means no MD update - just get the initial forces + + # read informations from lammps output + with open(os.path.join(tmp_work_dir, "dump.yaml"), "r") as f: + dump_yaml = yaml.safe_load_all(f) + doc = next(iter(dump_yaml)) + os.remove(os.path.join(tmp_work_dir, "dump.yaml")) # no need to keep the output as artefact + forces = pd.DataFrame(doc['data'], columns=doc['keywords']).sort_values("id") # organize in a dataframe + + # get the energy + ke = lmp.get_thermo('ke') # kinetic energy - should be 0 as atoms are created with 0 velocity + pe = lmp.get_thermo('pe') # potential energy + energy = ke + pe + + return energy, forces diff --git a/crystal_diffusion/samplers/noisy_position_sampler.py b/crystal_diffusion/samplers/noisy_position_sampler.py index c101e7d5..cd99098d 100644 --- a/crystal_diffusion/samplers/noisy_position_sampler.py +++ b/crystal_diffusion/samplers/noisy_position_sampler.py @@ -79,6 +79,6 @@ def get_noisy_position_sample(real_relative_positions: torch.Tensor, sigmas: tor "sigmas array is expected to be of the same shape as the real_relative_positions array" z_scores = NoisyPositionSampler._get_gaussian_noise(real_relative_positions.shape) - noise = sigmas * z_scores + noise = (sigmas * z_scores).to(real_relative_positions.device) noisy_relative_positions = map_positions_to_unit_cell(real_relative_positions + noise) return noisy_relative_positions diff --git a/crystal_diffusion/score/wrapped_gaussian_score.py b/crystal_diffusion/score/wrapped_gaussian_score.py index 2f9a560a..1ecea7ce 100644 --- a/crystal_diffusion/score/wrapped_gaussian_score.py +++ b/crystal_diffusion/score/wrapped_gaussian_score.py @@ -32,8 +32,8 @@ import numpy as np import torch -SIGMA_THRESHOLD = 1.0 / np.sqrt(2.0 * np.pi) -U_THRESHOLD = 0.5 +SIGMA_THRESHOLD = torch.Tensor([1.0 / np.sqrt(2.0 * np.pi)]) +U_THRESHOLD = torch.Tensor([0.5]) def get_sigma_normalized_score_brute_force(u: float, sigma: float, kmax: Optional[int] = None) -> float: @@ -124,7 +124,8 @@ def get_sigma_normalized_score( for mask_calculator, score_calculator in zip(mask_calculators, score_calculators): mask = mask_calculator(list_u, list_sigma) if mask.any(): - flat_view[mask] = score_calculator(list_u[mask], list_sigma[mask], list_k) + device = flat_view.device + flat_view[mask] = score_calculator(list_u[mask], list_sigma.to(device)[mask], list_k.to(device)) return sigma_normalized_scores @@ -139,7 +140,8 @@ def _get_small_sigma_small_u_mask(list_u: torch.Tensor, list_sigma: torch.Tensor Returns: mask_1a : an array of booleans of shape [Nu] """ - return torch.logical_and(list_sigma <= SIGMA_THRESHOLD, list_u < U_THRESHOLD) + device = list_u.device + return torch.logical_and(list_sigma.to(device) <= SIGMA_THRESHOLD.to(device), list_u < U_THRESHOLD.to(device)) def _get_small_sigma_large_u_mask(list_u: torch.Tensor, list_sigma: torch.Tensor) -> torch.Tensor: @@ -152,7 +154,8 @@ def _get_small_sigma_large_u_mask(list_u: torch.Tensor, list_sigma: torch.Tensor Returns: mask_1b : an array of booleans of shape [Nu] """ - return torch.logical_and(list_sigma <= SIGMA_THRESHOLD, list_u >= U_THRESHOLD) + device = list_u.device + return torch.logical_and(list_sigma.to(device) <= SIGMA_THRESHOLD.to(device), list_u >= U_THRESHOLD.to(device)) def _get_large_sigma_mask(list_u: torch.Tensor, list_sigma: torch.Tensor) -> torch.Tensor: diff --git a/data/si_diffusion_small/create_data.sh b/data/si_diffusion_small/create_data.sh new file mode 100755 index 00000000..81850b9f --- /dev/null +++ b/data/si_diffusion_small/create_data.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +TEMPERATURE=300 +BOX_SIZE=2 +STEP=10000 +CROP=10000 +NTRAIN_RUN=10 +NVALID_RUN=5 + +NRUN=$(($NTRAIN_RUN + $NVALID_RUN)) + +for SEED in $(seq 2 $NRUN); +do + if [ "$SEED" -le $NTRAIN_RUN ]; then + MODE="train" + else + MODE="valid" + fi + echo $MODE $SEED + mkdir -p "${MODE}_run_${SEED}" + cd "${MODE}_run_${SEED}" + lmp < ../in.si.lammps -v STEP $(($STEP + $CROP)) -v T $TEMPERATURE -v S $BOX_SIZE -v SEED $SEED + + # extract the thermodynamic outputs in a yaml file + egrep '^(keywords:|data:$|---$|\.\.\.$| - \[)' log.lammps > thermo_log.yaml + + mkdir -p "uncropped_outputs" + mv "dump.si-${TEMPERATURE}-${BOX_SIZE}.yaml" uncropped_outputs/ + mv thermo_log.yaml uncropped_outputs/ + + tail -n 760076 uncropped_outputs/dump.si-300-2.yaml > lammps_dump.yaml + + python ../../crop_lammps_outputs.py \ + --lammps_yaml "uncropped_outputs/dump.si-${TEMPERATURE}-${BOX_SIZE}.yaml" \ + --lammps_thermo "uncropped_outputs/thermo_log.yaml" \ + --crop $CROP \ + --output_dir ./ + + cd .. +done diff --git a/data/si_diffusion_small/in.si.lammps b/data/si_diffusion_small/in.si.lammps new file mode 100755 index 00000000..17f20e42 --- /dev/null +++ b/data/si_diffusion_small/in.si.lammps @@ -0,0 +1,29 @@ +log log.lammps + +units metal +atom_style atomic +atom_modify map array + +lattice diamond 5.43 +region simbox block 0 ${S} 0 ${S} 0 ${S} +create_box 1 simbox +create_atoms 1 region simbox + +mass 1 28.0855 + +group Si type 1 + +pair_style sw +pair_coeff * * ../../si.sw Si + +velocity all create ${T} ${SEED} + +dump 1 all yaml 1 dump.si-${T}-${S}.yaml id type x y z fx fy fz + +thermo_style yaml +thermo 1 +#==========================Output files======================== + +fix 1 all nvt temp ${T} ${T} 0.01 +run ${STEP} +unfix 1 diff --git a/requirements.txt b/requirements.txt index 18de11e3..3cb59aad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ gitpython==3.1.27 isort==5.13.2 jupyter==1.0.0 jinja2==3.1.2 +lammps maml==2023.9.9 monty==2024.2.2 myst-parser==2.0.0 diff --git a/tests/data/diffusion/test_data_preprocess.py b/tests/data/diffusion/test_data_preprocess.py index bc587bd6..682ac926 100644 --- a/tests/data/diffusion/test_data_preprocess.py +++ b/tests/data/diffusion/test_data_preprocess.py @@ -99,9 +99,9 @@ def sample_coordinates(box_coordinates): # Sample data frame return pd.DataFrame({ 'box': [box_coordinates], - 'x': [[60, 6, 0.6, 0.06]], - 'y': [[120, 12, 1.2, 0.12]], - 'z': [[180, 18, 1.8, 0.18]] + 'x': [[0.6, 0.06, 0.006, 0.00006]], + 'y': [[1.2, 0.12, 0.0012, 0.00012]], + 'z': [[1.8, 0.18, 0.018, 0.0018]] }) diff --git a/tests/oracle/test_lammps.py b/tests/oracle/test_lammps.py new file mode 100644 index 00000000..fb29bebe --- /dev/null +++ b/tests/oracle/test_lammps.py @@ -0,0 +1,41 @@ +import numpy as np +import pytest + +from crystal_diffusion.oracle.lammps import get_energy_and_forces_from_lammps + + +@pytest.fixture +def high_symmetry_lattice(): + box = np.eye(3) * 4 + return box + + +@pytest.fixture +def high_symmetry_positions(): + positions = np.array([[0, 0, 0], [2, 2, 2]]) + return positions + + +# do not run on github because no lammps +@pytest.mark.not_on_github +def test_high_symmetry(high_symmetry_positions, high_symmetry_lattice): + energy, forces = get_energy_and_forces_from_lammps(high_symmetry_positions, high_symmetry_lattice, + atom_types=np.array([1, 1])) + for x in ['x', 'y', 'z']: + assert np.allclose(forces[f'f{x}'], [0, 0]) + assert energy < 0 + + +@pytest.fixture +def low_symmetry_positions(): + positions = np.array([[0.23, 1.2, 2.01], [3.2, 0.9, 3.87]]) + return positions + + +@pytest.mark.not_on_github +def test_low_symmetry(low_symmetry_positions, high_symmetry_lattice): + energy, forces = get_energy_and_forces_from_lammps(low_symmetry_positions, high_symmetry_lattice, + atom_types=np.array([1, 1])) + for x in ['x', 'y', 'z']: + assert not np.allclose(forces[f'f{x}'], [0, 0]) + assert energy < 0