Skip to content

Commit

Permalink
Merge branch 'main' into tooling
Browse files Browse the repository at this point in the history
  • Loading branch information
rousseab committed Apr 5, 2024
2 parents 1147d2e + b5eda3b commit bc84515
Show file tree
Hide file tree
Showing 11 changed files with 192 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
pip install -e .
- name: unit tests
run: |
pytest --cov=crystal_diffusion
pytest --cov=crystal_diffusion -m "not not_on_github"
- name: type checking
run: |
pytype crystal_diffusion/
Expand Down
2 changes: 1 addition & 1 deletion crystal_diffusion/data/diffusion/data_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _convert_coords_to_relative(row: pd.Series) -> List[float]:
"""
x_lim, y_lim, z_lim = row['box']
coord_red = [coord for triple in zip(row['x'], row['y'], row['z']) for coord in
(triple[0] / x_lim, triple[1] / y_lim, triple[2] / z_lim)]
((triple[0] / x_lim) % 1, (triple[1] / y_lim) % 1, (triple[2] / z_lim) % 1)]
return coord_red

def get_x_relative(self, df: pd.DataFrame) -> pd.DataFrame:
Expand Down
3 changes: 1 addition & 2 deletions crystal_diffusion/models/score_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,7 @@ def _forward_unchecked(self, batch: Dict[AnyStr, torch.Tensor]) -> torch.Tensor:
computed_scores : the scores computed by the model.
"""
positions = batch[self.position_key] # shape [batch_size, number_of_atoms, spatial_dimension]
times = batch[self.timestep_key] # shape [batch_size, 1]

times = batch[self.timestep_key].to(positions.device) # shape [batch_size, 1]
input = torch.cat([self.flatten(positions), times], dim=1)

output = self.mlp_layers(input).reshape(positions.shape)
Expand Down
66 changes: 66 additions & 0 deletions crystal_diffusion/oracle/lammps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Call LAMMPS to get the forces and energy in a given configuration."""
import os
from pathlib import Path
from typing import Dict, Tuple

import lammps
import numpy as np
import pandas as pd
import yaml
from pymatgen.core import Element

from crystal_diffusion import DATA_DIR


def get_energy_and_forces_from_lammps(positions: np.ndarray,
box: np.ndarray,
atom_types: np.ndarray,
atom_type_map: Dict[int, str] = {1: 'Si'},
tmp_work_dir: str = './',
pair_coeff_dir: Path = DATA_DIR) -> Tuple[float, pd.DataFrame]:
"""Call LAMMPS to compute the forces on all atoms in a configuration.
Args:
positions: atomic positions as a n_atom x spatial dimension array
box: spatial dimension x spatial dimension array representing the periodic box. Assumed to be orthogonal.
atom_types: n_atom array with an index representing the type of each atom
atom_type_map (optional): map from index representing an atom type to a description of the atom.
Defaults to {1: "Si"}
tmp_work_dir (optional): directory where the LAMMPS output will be written (and deleted). Defaults to ./
pair_coeff_dir (optional): directory where the potentials as .sw files are stored. Defaults to DATA_DIR
Returns:
energy
dataframe with x, y, z coordinates and fx, fy, fz information in a dataframe.
"""
n_atom = positions.shape[0]
assert atom_types.shape == (n_atom, ), f"Atom types should match the number of atoms. Got {atom_types.shape}."
lmp = lammps.lammps() # create a lammps run
assert np.allclose(box, np.diag(np.diag(box))), "only orthogonal LAMMPS box are valid"
lmp.command(f"region simbox block 0 {box[0, 0]} 0 {box[1, 1]} 0 {box[2, 2]}") # TODO what if box is not orthogonal
lmp.command("create_box 1 simbox")
lmp.command("pair_style sw")
for k, v in atom_type_map.items():
elem = Element(v)
lmp.command(f"mass {k} {elem.atomic_mass.real}") # the .real is to get the value without the unit
lmp.command(f"group {v} type {k}")
lmp.command(f"pair_coeff * * {os.path.join(pair_coeff_dir, f'{v.lower()}.sw')} {v}")
for i in range(n_atom):
lmp.command(f"create_atoms {atom_types[i]} single {' '.join(map(str, positions[i, :]))}")
lmp.command("fix 1 all nvt temp 300 300 0.01") # selections here do not matter because we only do 1 step
lmp.command(f"dump 1 all yaml 1 {os.path.join(tmp_work_dir, 'dump.yaml')} id x y z fx fy fz") # TODO not good in 2D
lmp.command("run 0") # 0 is the last step index - so run 0 means no MD update - just get the initial forces

# read informations from lammps output
with open(os.path.join(tmp_work_dir, "dump.yaml"), "r") as f:
dump_yaml = yaml.safe_load_all(f)
doc = next(iter(dump_yaml))
os.remove(os.path.join(tmp_work_dir, "dump.yaml")) # no need to keep the output as artefact
forces = pd.DataFrame(doc['data'], columns=doc['keywords']).sort_values("id") # organize in a dataframe

# get the energy
ke = lmp.get_thermo('ke') # kinetic energy - should be 0 as atoms are created with 0 velocity
pe = lmp.get_thermo('pe') # potential energy
energy = ke + pe

return energy, forces
2 changes: 1 addition & 1 deletion crystal_diffusion/samplers/noisy_position_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,6 @@ def get_noisy_position_sample(real_relative_positions: torch.Tensor, sigmas: tor
"sigmas array is expected to be of the same shape as the real_relative_positions array"

z_scores = NoisyPositionSampler._get_gaussian_noise(real_relative_positions.shape)
noise = sigmas * z_scores
noise = (sigmas * z_scores).to(real_relative_positions.device)
noisy_relative_positions = map_positions_to_unit_cell(real_relative_positions + noise)
return noisy_relative_positions
13 changes: 8 additions & 5 deletions crystal_diffusion/score/wrapped_gaussian_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
import numpy as np
import torch

SIGMA_THRESHOLD = 1.0 / np.sqrt(2.0 * np.pi)
U_THRESHOLD = 0.5
SIGMA_THRESHOLD = torch.Tensor([1.0 / np.sqrt(2.0 * np.pi)])
U_THRESHOLD = torch.Tensor([0.5])


def get_sigma_normalized_score_brute_force(u: float, sigma: float, kmax: Optional[int] = None) -> float:
Expand Down Expand Up @@ -124,7 +124,8 @@ def get_sigma_normalized_score(
for mask_calculator, score_calculator in zip(mask_calculators, score_calculators):
mask = mask_calculator(list_u, list_sigma)
if mask.any():
flat_view[mask] = score_calculator(list_u[mask], list_sigma[mask], list_k)
device = flat_view.device
flat_view[mask] = score_calculator(list_u[mask], list_sigma.to(device)[mask], list_k.to(device))

return sigma_normalized_scores

Expand All @@ -139,7 +140,8 @@ def _get_small_sigma_small_u_mask(list_u: torch.Tensor, list_sigma: torch.Tensor
Returns:
mask_1a : an array of booleans of shape [Nu]
"""
return torch.logical_and(list_sigma <= SIGMA_THRESHOLD, list_u < U_THRESHOLD)
device = list_u.device
return torch.logical_and(list_sigma.to(device) <= SIGMA_THRESHOLD.to(device), list_u < U_THRESHOLD.to(device))


def _get_small_sigma_large_u_mask(list_u: torch.Tensor, list_sigma: torch.Tensor) -> torch.Tensor:
Expand All @@ -152,7 +154,8 @@ def _get_small_sigma_large_u_mask(list_u: torch.Tensor, list_sigma: torch.Tensor
Returns:
mask_1b : an array of booleans of shape [Nu]
"""
return torch.logical_and(list_sigma <= SIGMA_THRESHOLD, list_u >= U_THRESHOLD)
device = list_u.device
return torch.logical_and(list_sigma.to(device) <= SIGMA_THRESHOLD.to(device), list_u >= U_THRESHOLD.to(device))


def _get_large_sigma_mask(list_u: torch.Tensor, list_sigma: torch.Tensor) -> torch.Tensor:
Expand Down
40 changes: 40 additions & 0 deletions data/si_diffusion_small/create_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

TEMPERATURE=300
BOX_SIZE=2
STEP=10000
CROP=10000
NTRAIN_RUN=10
NVALID_RUN=5

NRUN=$(($NTRAIN_RUN + $NVALID_RUN))

for SEED in $(seq 2 $NRUN);
do
if [ "$SEED" -le $NTRAIN_RUN ]; then
MODE="train"
else
MODE="valid"
fi
echo $MODE $SEED
mkdir -p "${MODE}_run_${SEED}"
cd "${MODE}_run_${SEED}"
lmp < ../in.si.lammps -v STEP $(($STEP + $CROP)) -v T $TEMPERATURE -v S $BOX_SIZE -v SEED $SEED

# extract the thermodynamic outputs in a yaml file
egrep '^(keywords:|data:$|---$|\.\.\.$| - \[)' log.lammps > thermo_log.yaml

mkdir -p "uncropped_outputs"
mv "dump.si-${TEMPERATURE}-${BOX_SIZE}.yaml" uncropped_outputs/
mv thermo_log.yaml uncropped_outputs/

tail -n 760076 uncropped_outputs/dump.si-300-2.yaml > lammps_dump.yaml

python ../../crop_lammps_outputs.py \
--lammps_yaml "uncropped_outputs/dump.si-${TEMPERATURE}-${BOX_SIZE}.yaml" \
--lammps_thermo "uncropped_outputs/thermo_log.yaml" \
--crop $CROP \
--output_dir ./

cd ..
done
29 changes: 29 additions & 0 deletions data/si_diffusion_small/in.si.lammps
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
log log.lammps

units metal
atom_style atomic
atom_modify map array

lattice diamond 5.43
region simbox block 0 ${S} 0 ${S} 0 ${S}
create_box 1 simbox
create_atoms 1 region simbox

mass 1 28.0855

group Si type 1

pair_style sw
pair_coeff * * ../../si.sw Si

velocity all create ${T} ${SEED}

dump 1 all yaml 1 dump.si-${T}-${S}.yaml id type x y z fx fy fz

thermo_style yaml
thermo 1
#==========================Output files========================

fix 1 all nvt temp ${T} ${T} 0.01
run ${STEP}
unfix 1
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ gitpython==3.1.27
isort==5.13.2
jupyter==1.0.0
jinja2==3.1.2
lammps
maml==2023.9.9
monty==2024.2.2
myst-parser==2.0.0
Expand Down
6 changes: 3 additions & 3 deletions tests/data/diffusion/test_data_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ def sample_coordinates(box_coordinates):
# Sample data frame
return pd.DataFrame({
'box': [box_coordinates],
'x': [[60, 6, 0.6, 0.06]],
'y': [[120, 12, 1.2, 0.12]],
'z': [[180, 18, 1.8, 0.18]]
'x': [[0.6, 0.06, 0.006, 0.00006]],
'y': [[1.2, 0.12, 0.0012, 0.00012]],
'z': [[1.8, 0.18, 0.018, 0.0018]]
})


Expand Down
41 changes: 41 additions & 0 deletions tests/oracle/test_lammps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import numpy as np
import pytest

from crystal_diffusion.oracle.lammps import get_energy_and_forces_from_lammps


@pytest.fixture
def high_symmetry_lattice():
box = np.eye(3) * 4
return box


@pytest.fixture
def high_symmetry_positions():
positions = np.array([[0, 0, 0], [2, 2, 2]])
return positions


# do not run on github because no lammps
@pytest.mark.not_on_github
def test_high_symmetry(high_symmetry_positions, high_symmetry_lattice):
energy, forces = get_energy_and_forces_from_lammps(high_symmetry_positions, high_symmetry_lattice,
atom_types=np.array([1, 1]))
for x in ['x', 'y', 'z']:
assert np.allclose(forces[f'f{x}'], [0, 0])
assert energy < 0


@pytest.fixture
def low_symmetry_positions():
positions = np.array([[0.23, 1.2, 2.01], [3.2, 0.9, 3.87]])
return positions


@pytest.mark.not_on_github
def test_low_symmetry(low_symmetry_positions, high_symmetry_lattice):
energy, forces = get_energy_and_forces_from_lammps(low_symmetry_positions, high_symmetry_lattice,
atom_types=np.array([1, 1]))
for x in ['x', 'y', 'z']:
assert not np.allclose(forces[f'f{x}'], [0, 0])
assert energy < 0

0 comments on commit bc84515

Please sign in to comment.