From 13fc5439f499a5b0276ea6af9f3f5edb3d999561 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Thu, 28 Mar 2024 14:19:06 -0400
Subject: [PATCH 1/6] end2end diffusion model training

---
 .../data/diffusion/data_loader.py             |  14 +-
 .../data/diffusion/data_preprocess.py         |  19 +-
 crystal_diffusion/models/model_loader.py      |  47 +++-
 crystal_diffusion/train_diffusion.py          | 248 ++++++++++++++++++
 examples/local/config_diffusion.yaml          |  34 +++
 examples/local/run_diffusion.sh               |  16 ++
 6 files changed, 346 insertions(+), 32 deletions(-)
 create mode 100644 crystal_diffusion/train_diffusion.py
 create mode 100644 examples/local/config_diffusion.yaml
 create mode 100755 examples/local/run_diffusion.sh

diff --git a/crystal_diffusion/data/diffusion/data_loader.py b/crystal_diffusion/data/diffusion/data_loader.py
index c58cf7b2..a2a0ea58 100644
--- a/crystal_diffusion/data/diffusion/data_loader.py
+++ b/crystal_diffusion/data/diffusion/data_loader.py
@@ -9,11 +9,9 @@
 import pytorch_lightning as pl
 import torch
 import torch.nn.functional as F
-from torch.utils.data import DataLoader
-
 from crystal_diffusion.data.diffusion.data_preprocess import \
     LammpsProcessorForDiffusion
-from crystal_diffusion.utils.hp_utils import check_and_log_hp
+from torch.utils.data import DataLoader
 
 logger = logging.getLogger(__name__)
 
@@ -49,7 +47,7 @@ def __init__(
                 lot of disk space. Defaults to None.
         """
         super().__init__()
-        check_and_log_hp(["batch_size", "num_workers"], hyper_params)  # validate the hyperparameters
+        # check_and_log_hp(["batch_size", "num_workers"], hyper_params)  # validate the hyperparameters
         # TODO add the padding parameters for number of atoms
         self.lammps_run_dir = lammps_run_dir
         self.processed_dataset_dir = processed_dataset_dir
@@ -68,7 +66,7 @@ def dataset_transform(x: Dict[typing.AnyStr, typing.Any], spatial_dim: int = 3)
 
         Args:
             x: raw columns from the processed data files. Should contain natom, box, type, position and
-                reduced_position.
+                relative_positions.
             spatial_dim (optional): number of spatial dimensions. Defaults to 3.
 
         Returns:
@@ -78,7 +76,7 @@ def dataset_transform(x: Dict[typing.AnyStr, typing.Any], spatial_dim: int = 3)
         transformed_x['natom'] = torch.as_tensor(x['natom']).long()  # resulting tensor size: (batchsize, )
         bsize = transformed_x['natom'].size(0)
         transformed_x['box'] = torch.as_tensor(x['box'])  # size: (batchsize, spatial dimension)
-        for pos in ['position', 'reduced_position']:
+        for pos in ['position', 'relative_positions']:
             transformed_x[pos] = torch.as_tensor(x[pos]).view(bsize, -1, spatial_dim)
         transformed_x['type'] = torch.as_tensor(x['type']).long()  # size: (batchsize, max atom)
 
@@ -89,7 +87,7 @@ def pad_samples(x: Dict[typing.AnyStr, typing.Any], max_atom: int, spatial_dim:
         """Pad a sample for batching.
 
         Args:
-            x: initial sample from the dataset. Should contain natom, position, reduced_position and type.
+            x: initial sample from the dataset. Should contain natom, position, relative_positions and type.
             max_atom: maximum number of atoms to pad to
             spatial_dim (optional): number of spatial dimensions. Defaults to 3.
 
@@ -100,7 +98,7 @@ def pad_samples(x: Dict[typing.AnyStr, typing.Any], max_atom: int, spatial_dim:
         if natom > max_atom:
             raise ValueError(f"Hyper-parameter max_atom is smaller than an example in the dataset with {natom} atoms.")
         x['type'] = F.pad(torch.as_tensor(x['type']).long(), (0, max_atom - natom), 'constant', -1)
-        for pos in ['position', 'reduced_position']:
+        for pos in ['position', 'relative_positions']:
             x[pos] = F.pad(torch.as_tensor(x[pos]).float(), (0, spatial_dim * (max_atom - natom)), 'constant',
                            torch.nan)
         return x
diff --git a/crystal_diffusion/data/diffusion/data_preprocess.py b/crystal_diffusion/data/diffusion/data_preprocess.py
index 34aa4661..f0105976 100644
--- a/crystal_diffusion/data/diffusion/data_preprocess.py
+++ b/crystal_diffusion/data/diffusion/data_preprocess.py
@@ -5,7 +5,6 @@
 from typing import List, Optional
 
 import pandas as pd
-
 from crystal_diffusion.data.parse_lammps_outputs import parse_lammps_output
 
 logger = logging.getLogger(__name__)
@@ -55,30 +54,30 @@ def prepare_data(self, raw_data_dir: str, mode: str = 'train') -> List[str]:
         return list_files
 
     @staticmethod
-    def _convert_coords_to_reduced(row: pd.Series) -> List[float]:
-        """Convert a dataframe row to reduced coordinates.
+    def _convert_coords_to_relative(row: pd.Series) -> List[float]:
+        """Convert a dataframe row to relative coordinates.
 
         Args:
             row: entry in the dataframe. Should contain box, x, y and z
 
         Returns:
-            x, y and z in reduced coordinates
+            x, y and z in relative (reduced) coordinates
         """
         x_lim, y_lim, z_lim = row['box']
         coord_red = [coord for triple in zip(row['x'], row['y'], row['z']) for coord in
                      (triple[0] / x_lim, triple[1] / y_lim, triple[2] / z_lim)]
         return coord_red
 
-    def get_x_reduced(self, df: pd.DataFrame) -> pd.DataFrame:
-        """Add a column with reduced x,y, z coordinates.
+    def get_x_relative(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Add a column with relative x,y, z coordinates.
 
         Args:
             df: dataframe with atomic positions. Should contain box, x, y and z.
 
         Returns:
-            dataframe with added column of reduced positions [x1, y1, z1, x2, y2, ...]
+            dataframe with added column of relative positions [x1, y1, z1, x2, y2, ...]
         """
-        df['reduced_position'] = df.apply(lambda x: self._convert_coords_to_reduced(x), axis=1)
+        df['relative_positions'] = df.apply(lambda x: self._convert_coords_to_relative(x), axis=1)
         return df
 
     def parse_lammps_run(self, run_dir: str) -> Optional[pd.DataFrame]:
@@ -114,11 +113,11 @@ def parse_lammps_run(self, run_dir: str) -> Optional[pd.DataFrame]:
         # TODO consider filtering out samples with large forces and MD steps that are too similar
         # TODO large force and similar are to be defined
         df = df[['type', 'x', 'y', 'z', 'box']]
-        df = self.get_x_reduced(df)  # add reduced coordinates
+        df = self.get_x_relative(df)  # add relative coordinates
         df['natom'] = df['type'].apply(lambda x: len(x))  # count number of atoms in a structure
         # naive implementation: a list of list which is converted into a 2d array by torch later
         # but a list of list is not ok with the writing on files with parquet
         df['position'] = df.apply(lambda x: [j for i in ['x', 'y', 'z'] for j in x[i]], axis=1)  # position as 3d array
         # position is natom * 3 array
         # TODO unit test to check the order after reshape
-        return df[['natom', 'box', 'type', 'position', 'reduced_position']]
+        return df[['natom', 'box', 'type', 'position', 'relative_positions']]
diff --git a/crystal_diffusion/models/model_loader.py b/crystal_diffusion/models/model_loader.py
index 197189be..4a13bcb0 100644
--- a/crystal_diffusion/models/model_loader.py
+++ b/crystal_diffusion/models/model_loader.py
@@ -1,28 +1,47 @@
+"""Functions to instantiate a model based on the provided hyperparameters."""
 import logging
+from typing import Any, AnyStr, Dict
 
-from crystal_diffusion.models.score_network import MLPScoreNetwork
+from crystal_diffusion.models.optimizer import (OptimizerParameters,
+                                                ValidOptimizerNames)
+from crystal_diffusion.models.position_diffusion_lightning_model import (
+    PositionDiffusionLightningModel, PositionDiffusionParameters)
+from crystal_diffusion.models.score_network import MLPScoreNetworkParameters
+from crystal_diffusion.samplers.variance_sampler import NoiseParameters
 
 logger = logging.getLogger(__name__)
 
 
-def load_model(hyper_params):  # pragma: no cover
-    """Instantiate a model.
+def load_diffusion_model(hyper_params: Dict[AnyStr, Any]) -> PositionDiffusionLightningModel:
+    """Load a position diffusion model from the hyperparameters.
 
     Args:
-        hyper_params (dict): hyper parameters from the config file
+        hyper_params: dictionary of hyperparameters loaded from a config file
 
     Returns:
-        model (obj): A neural network model object.
+        Diffusion model randomly initialized
     """
-    architecture = hyper_params['architecture']
-    # __TODO__ fix architecture list
-    if architecture == 'simple_mlp':
-        model_class = MLPScoreNetwork
-    else:
-        raise ValueError('architecture {} not supported'.format(architecture))
-    logger.info('selected architecture: {}'.format(architecture))
-
-    model = model_class(hyper_params)
+    score_network_parameters = MLPScoreNetworkParameters(
+        number_of_atoms=hyper_params['data']['max_atom'],
+        **hyper_params['model']['score_network']
+    )
+    score_network_parameters.spatial_dimension = hyper_params.get('spatial_dimension', 3)
+
+    hyper_params['optimizer']['name'] = ValidOptimizerNames(hyper_params['optimizer']['name'])
+
+    optimizer_parameters = OptimizerParameters(
+        **hyper_params['optimizer']
+    )
+
+    noise_parameters = NoiseParameters(**hyper_params['model']['noise'])
+
+    diffusion_params = PositionDiffusionParameters(
+        score_network_parameters=score_network_parameters,
+        optimizer_parameters=optimizer_parameters,
+        noise_parameters=noise_parameters,
+    )
+
+    model = PositionDiffusionLightningModel(diffusion_params)
     logger.info('model info:\n' + str(model) + '\n')
 
     return model
diff --git a/crystal_diffusion/train_diffusion.py b/crystal_diffusion/train_diffusion.py
new file mode 100644
index 00000000..ff8fc7a6
--- /dev/null
+++ b/crystal_diffusion/train_diffusion.py
@@ -0,0 +1,248 @@
+"""Entry point to train a diffusion model."""
+import argparse
+import glob
+import logging
+import os
+import shutil
+import sys
+
+import orion
+import pytorch_lightning as pl
+import yaml
+from crystal_diffusion.data.diffusion.data_loader import (
+    LammpsForDiffusionDataModule, LammpsLoaderParameters)
+from crystal_diffusion.models.model_loader import load_diffusion_model
+from crystal_diffusion.utils.file_utils import rsync_folder
+from crystal_diffusion.utils.hp_utils import check_and_log_hp
+from crystal_diffusion.utils.logging_utils import LoggerWriter, log_exp_details
+from crystal_diffusion.utils.reproducibility_utils import set_seed
+from orion.client import report_results
+from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
+from yaml import load
+
+logger = logging.getLogger(__name__)
+
+BEST_MODEL_NAME = 'best_model'
+LAST_MODEL_NAME = 'last_model'
+
+
+def main():
+    """Create and train a diffusion model: main entry point of the program.
+
+    Note:
+        This main.py file is meant to be called using the cli,
+        see the `examples/local/run_diffusion.sh` file to see how to use it.
+
+    """
+    parser = argparse.ArgumentParser()
+    # __TODO__ check you need all the following CLI parameters
+    parser.add_argument('--log', help='log to this file (in addition to stdout/err)')
+    parser.add_argument('--config',
+                        help='config file with generic hyper-parameters,  such as optimizer, '
+                             'batch_size, ... -  in yaml format')
+    parser.add_argument('--data', help='path to a LAMMPS data set', required=True)
+    parser.add_argument('--processed_datadir', help='path to the processed data directory', required=True)
+    parser.add_argument('--dataset_working_dir', help='path to the Datasets working directory. Defaults to None',
+                        default=None)
+    parser.add_argument('--tmp-folder',
+                        help='will use this folder as working folder - it will copy the input data '
+                             'here, generate results here, and then copy them back to the output '
+                             'folder')  # TODO possibly remove this
+    parser.add_argument('--output', help='path to outputs - will store files here', required=True)
+    parser.add_argument('--disable-progressbar', action='store_true',
+                        help='will disable the progressbar while going over the mini-batch')
+    parser.add_argument('--start-from-scratch', action='store_true',
+                        help='will not load any existing saved model - even if present')
+    parser.add_argument('--accelerator', help='PL trainer accelerator. Defaults to auto.', default='auto')
+    parser.add_argument('--devices', default=1, help='pytorch-lightning devices kwarg. Defaults to 1.')
+    parser.add_argument('--debug', action='store_true')  # TODO not used yet
+    args = parser.parse_args()
+
+    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+    if os.path.exists(args.output) and args.start_from_scratch:
+        logger.info('Starting from scratch, removing any previous experiments.')
+        shutil.rmtree(args.output)
+
+    if os.path.exists(args.output):
+        logger.info("Previous experiment found, resuming from checkpoint")
+    else:
+        os.makedirs(args.output)
+
+    if args.tmp_folder is not None:
+        # TODO data rsync to tmp_folder
+        output_dir = os.path.join(args.tmp_folder, 'output')
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+    else:
+        output_dir = args.output
+
+    # will log to a file if provided (useful for orion on cluster)
+    if args.log is not None:
+        handler = logging.handlers.WatchedFileHandler(args.log)
+        formatter = logging.Formatter(logging.BASIC_FORMAT)
+        handler.setFormatter(formatter)
+        root = logging.getLogger()
+        root.setLevel(logging.INFO)
+        root.addHandler(handler)
+
+    # to intercept any print statement:
+    sys.stdout = LoggerWriter(logger.info)
+    sys.stderr = LoggerWriter(logger.warning)
+
+    if args.config is not None:
+        with open(args.config, 'r') as stream:
+            hyper_params = load(stream, Loader=yaml.FullLoader)
+    else:
+        hyper_params = {}
+
+    run(args, output_dir, hyper_params)
+
+    if args.tmp_folder is not None:
+        rsync_folder(output_dir + os.path.sep, args.output)
+
+
+def run(args, output_dir, hyper_params):
+    """Create and run the dataloaders, training loops, etc.
+
+    Args:
+        args (object): arguments passed from the cli
+        output_dir (str): path to output folder
+        hyper_params (dict): hyper parameters from the config file
+    """
+    # __TODO__ change the hparam that are used from the training algorithm
+    # (and NOT the model - these will be specified in the model itself)
+    logger.info('List of hyper-parameters:')
+    check_and_log_hp(
+        ['model', 'data', 'exp_name', 'max_epoch', 'optimizer', 'seed',
+         'early_stopping'],
+        hyper_params)
+
+    if hyper_params["seed"] is not None:
+        set_seed(hyper_params["seed"])
+
+    log_exp_details(os.path.realpath(__file__), args)
+
+    data_params = LammpsLoaderParameters(**hyper_params['data'])
+
+    datamodule = LammpsForDiffusionDataModule(
+        lammps_run_dir=args.data,
+        processed_dataset_dir=args.processed_datadir,
+        hyper_params=data_params,
+        working_cache_dir=args.dataset_working_dir,
+    )
+
+    model = load_diffusion_model(hyper_params)
+
+    train(model=model, datamodule=datamodule, output=output_dir, hyper_params=hyper_params,
+          use_progress_bar=not args.disable_progressbar, accelerator=args.accelerator, devices=args.devices)
+
+    # clean up the data cache to save disk space
+    datamodule.clean_up()
+
+
+def train(**kwargs):  # pragma: no cover
+    """Training loop wrapper. Used to catch exception if Orion is being used."""
+    try:
+        best_dev_metric = train_impl(**kwargs)
+    except RuntimeError as err:
+        if orion.client.cli.IS_ORION_ON and 'CUDA out of memory' in str(err):
+            logger.error(err)
+            logger.error('model was out of memory - assigning a bad score to tell Orion to avoid'
+                         'too big model')
+            best_dev_metric = -999
+        else:
+            raise err
+
+    report_results([dict(
+        name='dev_metric',
+        type='objective',
+        # note the minus - cause orion is always trying to minimize (cit. from the guide)
+        value=-float(best_dev_metric))])
+
+
+def train_impl(model, datamodule, output, hyper_params, use_progress_bar, accelerator=None, devices=None
+               ):  # pragma: no cover
+    """Train a model: main training loop implementation.
+
+    Args:
+        model (obj): The neural network model object.
+        datamodule (obj): lightning data module that will instantiate data loaders.
+        output (str): Output directory.
+        hyper_params (dict): Dict containing hyper-parameters.
+        use_progress_bar (bool): Use tqdm progress bar (can be disabled when logging).
+        accelerator: PL trainer accelerator
+        devices: PL devices to use
+    """
+    check_and_log_hp(['max_epoch'], hyper_params)
+
+    best_model_path = os.path.join(output, BEST_MODEL_NAME)
+    best_checkpoint_callback = ModelCheckpoint(
+        dirpath=best_model_path,
+        filename='model',
+        save_top_k=1,
+        verbose=use_progress_bar,
+        monitor="val_loss",
+        mode="max",
+        every_n_epochs=1,
+    )
+
+    last_model_path = os.path.join(output, LAST_MODEL_NAME)
+    last_checkpoint_callback = ModelCheckpoint(
+        dirpath=last_model_path,
+        filename='model',
+        verbose=use_progress_bar,
+        every_n_epochs=1,
+    )
+
+    # TODO pl Trainer does not use the kwarg resume_from_checkpoint now - check about resume training works now
+    # resume_from_checkpoint = handle_previous_models(output, last_model_path, best_model_path)
+
+
+    early_stopping_params = hyper_params['early_stopping']
+    check_and_log_hp(['metric', 'mode', 'patience'], hyper_params['early_stopping'])
+    early_stopping = EarlyStopping(
+        early_stopping_params['metric'],
+        mode=early_stopping_params['mode'],
+        patience=early_stopping_params['patience'],
+        verbose=use_progress_bar)
+
+    logger = pl.loggers.TensorBoardLogger(
+        save_dir=output,
+        default_hp_metric=False,
+        version=0,  # Necessary to resume tensorboard logging
+    )
+
+    trainer = pl.Trainer(
+        callbacks=[early_stopping, best_checkpoint_callback, last_checkpoint_callback],
+        max_epochs=hyper_params['max_epoch'],
+        # resume_from_checkpoint=resume_from_checkpoint,
+        accelerator=accelerator,
+        devices=devices,
+        logger=logger,
+    )
+
+    trainer.fit(model, datamodule=datamodule)
+
+    # Log the best result and associated hyper parameters
+    best_dev_result = float(early_stopping.best_score.cpu().numpy())
+    logger.log_hyperparams(hyper_params, metrics={'best_dev_metric': best_dev_result})
+
+    return best_dev_result
+
+
+def handle_previous_models(output, last_model_path, best_model_path):
+    """Move the previous models in a new timestamp folder."""
+    last_models = glob.glob(last_model_path + os.sep + '*')
+
+    if len(last_models) >= 1:
+        resume_from_checkpoint = sorted(last_models)[-1]
+        logger.info(f'models found - resuming from {resume_from_checkpoint}')
+    else:
+        logger.info('no model found - starting training from scratch')
+        resume_from_checkpoint = None
+    return resume_from_checkpoint
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/local/config_diffusion.yaml b/examples/local/config_diffusion.yaml
new file mode 100644
index 00000000..042f5fbe
--- /dev/null
+++ b/examples/local/config_diffusion.yaml
@@ -0,0 +1,34 @@
+# general
+loss: cross_entropy
+max_epoch: 5
+exp_name: exp_example
+# set to null to avoid setting a seed (can speed up GPU computation, but
+# results will not be reproducible)
+seed: 1234
+
+# data
+data:
+  batch_size: 32
+  num_workers: 0
+  max_atom: 512
+
+# architecture
+spatial_dimension: 3
+model:
+  score_network:
+    hidden_dimensions: [16, 16]  # dimensions of the hidden layers. Length of array determines number of la
+  noise:
+    total_time_steps: 10
+    sigma_min: 0.005  # default value
+    sigma_max: 0.5  # default value
+
+# optimizer
+optimizer:
+  name: adam
+  learning_rate: 0.001
+
+# early stopping
+early_stopping:
+  metric: val_loss
+  mode: min
+  patience: 3
\ No newline at end of file
diff --git a/examples/local/run_diffusion.sh b/examples/local/run_diffusion.sh
new file mode 100755
index 00000000..8e1d3a7e
--- /dev/null
+++ b/examples/local/run_diffusion.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+LOG=debug.log
+CONFIG=config_diffusion.yaml
+DATA_DIR=../../data/si_diffusion_v1
+PROCESSED_DATA=${DATA_DIR}/processed
+DATA_WORK_DIR=./tmp_work_dir/
+OUTPUT=debug
+
+python ../../crystal_diffusion/train_diffusion.py \
+    --log $LOG \
+    --config $CONFIG \
+    --data $DATA_DIR \
+    --processed_datadir $PROCESSED_DATA \
+    --dataset_working_dir $DATA_WORK_DIR \
+    --output $OUTPUT

From 497b9eee789bd4ced0dfd64cff2073f558a6a370 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Thu, 28 Mar 2024 14:27:42 -0400
Subject: [PATCH 2/6] fixing unit tests

---
 tests/data/diffusion/test_data_loader.py     |  6 +++---
 tests/data/diffusion/test_data_preprocess.py | 18 +++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/data/diffusion/test_data_loader.py b/tests/data/diffusion/test_data_loader.py
index a3ef4cd4..49d88482 100644
--- a/tests/data/diffusion/test_data_loader.py
+++ b/tests/data/diffusion/test_data_loader.py
@@ -12,14 +12,14 @@ def input_data_to_transform(self):
             'natom': [2],  # batch size of 1
             'box': [[1.0, 1.0, 1.0]],
             'position': [[1., 2., 3, 4., 5, 6]],  # for one batch, two atoms, 3D positions
-            'reduced_position': [[1., 2., 3, 4., 5, 6]],
+            'relative_positions': [[1., 2., 3, 4., 5, 6]],
             'type': [[1, 2]]
         }
 
     def test_dataset_transform(self, input_data_to_transform):
         result = LammpsForDiffusionDataModule.dataset_transform(input_data_to_transform)
         # Check keys in result
-        assert set(result.keys()) == {'natom', 'position', 'reduced_position', 'box', 'type'}
+        assert set(result.keys()) == {'natom', 'position', 'relative_positions', 'box', 'type'}
 
         # Check tensor types and shapes
         assert torch.equal(result['natom'], torch.tensor([2]).long())
@@ -39,7 +39,7 @@ def input_data_to_pad(self):
             'natom': 2,  # batch size of 1
             'box': [1.0, 1.0, 1.0],
             'position': [1., 2., 3, 4., 5, 6],  # for one batch, two atoms, 3D positions
-            'reduced_position': [1., 2., 3, 4., 5, 6],
+            'relative_positions': [1., 2., 3, 4., 5, 6],
             'type': [1, 2]
         }
 
diff --git a/tests/data/diffusion/test_data_preprocess.py b/tests/data/diffusion/test_data_preprocess.py
index 3354e1a6..bc587bd6 100644
--- a/tests/data/diffusion/test_data_preprocess.py
+++ b/tests/data/diffusion/test_data_preprocess.py
@@ -86,7 +86,7 @@ def test_parse_lammps_run(mock_processor, mock_parse_lammps_output, tmp_path):
     assert 'box' in df.columns
     assert 'type' in df.columns
     assert 'position' in df.columns
-    assert 'reduced_position' in df.columns
+    assert 'relative_positions' in df.columns
 
 
 @pytest.fixture
@@ -105,14 +105,14 @@ def sample_coordinates(box_coordinates):
     })
 
 
-def test_convert_coords_to_reduced(sample_coordinates, box_coordinates):
+def test_convert_coords_to_relative(sample_coordinates, box_coordinates):
     # Expected output: Each coordinate divided by 1, 2, 3 (the box limits)
     for index, row in sample_coordinates.iterrows():
-        reduced_coords = LammpsProcessorForDiffusion._convert_coords_to_reduced(row)
+        relative_coords = LammpsProcessorForDiffusion._convert_coords_to_relative(row)
         expected_coords = []
         for x, y, z in zip(row['x'], row['y'], row['z']):
             expected_coords.extend([x / box_coordinates[0], y / box_coordinates[1], z / box_coordinates[2]])
-        assert reduced_coords == expected_coords
+        assert relative_coords == expected_coords
 
 
 @pytest.fixture
@@ -123,9 +123,9 @@ def mock_prepare_data():
         yield mock_prepare
 
 
-def test_get_x_reduced(mock_prepare_data, sample_coordinates, tmpdir):
-    # Call get_x_reduced on the test data
+def test_get_x_relative(mock_prepare_data, sample_coordinates, tmpdir):
+    # Call get_x_relative on the test data
     lp = LammpsProcessorForDiffusion(tmpdir, tmpdir)
-    result_df = lp.get_x_reduced(sample_coordinates)
-    # Check if 'reduced_position' column is added
-    assert 'reduced_position' in result_df.columns
+    result_df = lp.get_x_relative(sample_coordinates)
+    # Check if 'relative_positions' column is added
+    assert 'relative_positions' in result_df.columns

From 79b426649edf3b687f9c6ef85bc39dcdd2d3141c Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Thu, 28 Mar 2024 14:29:05 -0400
Subject: [PATCH 3/6] extra white line

---
 crystal_diffusion/train_diffusion.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crystal_diffusion/train_diffusion.py b/crystal_diffusion/train_diffusion.py
index ff8fc7a6..6a688722 100644
--- a/crystal_diffusion/train_diffusion.py
+++ b/crystal_diffusion/train_diffusion.py
@@ -198,7 +198,6 @@ def train_impl(model, datamodule, output, hyper_params, use_progress_bar, accele
     # TODO pl Trainer does not use the kwarg resume_from_checkpoint now - check about resume training works now
     # resume_from_checkpoint = handle_previous_models(output, last_model_path, best_model_path)
 
-
     early_stopping_params = hyper_params['early_stopping']
     check_and_log_hp(['metric', 'mode', 'patience'], hyper_params['early_stopping'])
     early_stopping = EarlyStopping(

From 5dfffac3cc26432600d4aeb5bc0ab7343633e342 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Thu, 28 Mar 2024 14:50:54 -0400
Subject: [PATCH 4/6] isort error?

---
 crystal_diffusion/data/diffusion/data_loader.py     | 3 ++-
 crystal_diffusion/data/diffusion/data_preprocess.py | 1 +
 crystal_diffusion/train_diffusion.py                | 7 ++++---
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/crystal_diffusion/data/diffusion/data_loader.py b/crystal_diffusion/data/diffusion/data_loader.py
index a2a0ea58..12ac202e 100644
--- a/crystal_diffusion/data/diffusion/data_loader.py
+++ b/crystal_diffusion/data/diffusion/data_loader.py
@@ -9,9 +9,10 @@
 import pytorch_lightning as pl
 import torch
 import torch.nn.functional as F
+from torch.utils.data import DataLoader
+
 from crystal_diffusion.data.diffusion.data_preprocess import \
     LammpsProcessorForDiffusion
-from torch.utils.data import DataLoader
 
 logger = logging.getLogger(__name__)
 
diff --git a/crystal_diffusion/data/diffusion/data_preprocess.py b/crystal_diffusion/data/diffusion/data_preprocess.py
index f0105976..a00208f5 100644
--- a/crystal_diffusion/data/diffusion/data_preprocess.py
+++ b/crystal_diffusion/data/diffusion/data_preprocess.py
@@ -5,6 +5,7 @@
 from typing import List, Optional
 
 import pandas as pd
+
 from crystal_diffusion.data.parse_lammps_outputs import parse_lammps_output
 
 logger = logging.getLogger(__name__)
diff --git a/crystal_diffusion/train_diffusion.py b/crystal_diffusion/train_diffusion.py
index 6a688722..1a87e274 100644
--- a/crystal_diffusion/train_diffusion.py
+++ b/crystal_diffusion/train_diffusion.py
@@ -9,6 +9,10 @@
 import orion
 import pytorch_lightning as pl
 import yaml
+from orion.client import report_results
+from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
+from yaml import load
+
 from crystal_diffusion.data.diffusion.data_loader import (
     LammpsForDiffusionDataModule, LammpsLoaderParameters)
 from crystal_diffusion.models.model_loader import load_diffusion_model
@@ -16,9 +20,6 @@
 from crystal_diffusion.utils.hp_utils import check_and_log_hp
 from crystal_diffusion.utils.logging_utils import LoggerWriter, log_exp_details
 from crystal_diffusion.utils.reproducibility_utils import set_seed
-from orion.client import report_results
-from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
-from yaml import load
 
 logger = logging.getLogger(__name__)
 

From e8b91eaba957b82af4330d9a822b791ce19f9bd3 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Thu, 28 Mar 2024 15:10:19 -0400
Subject: [PATCH 5/6] missing fnct in model_loader that should be removed in
 the future

---
 crystal_diffusion/models/model_loader.py | 26 +++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/crystal_diffusion/models/model_loader.py b/crystal_diffusion/models/model_loader.py
index 4a13bcb0..e4d1c1c3 100644
--- a/crystal_diffusion/models/model_loader.py
+++ b/crystal_diffusion/models/model_loader.py
@@ -6,7 +6,8 @@
                                                 ValidOptimizerNames)
 from crystal_diffusion.models.position_diffusion_lightning_model import (
     PositionDiffusionLightningModel, PositionDiffusionParameters)
-from crystal_diffusion.models.score_network import MLPScoreNetworkParameters
+from crystal_diffusion.models.score_network import (MLPScoreNetwork,
+                                                    MLPScoreNetworkParameters)
 from crystal_diffusion.samplers.variance_sampler import NoiseParameters
 
 logger = logging.getLogger(__name__)
@@ -45,3 +46,26 @@ def load_diffusion_model(hyper_params: Dict[AnyStr, Any]) -> PositionDiffusionLi
     logger.info('model info:\n' + str(model) + '\n')
 
     return model
+
+
+def load_model(hyper_params):  # pragma: no cover
+    """Instantiate a model.
+
+    Args:
+        hyper_params (dict): hyper parameters from the config file
+
+    Returns:
+        model (obj): A neural network model object.
+    """
+    architecture = hyper_params['architecture']
+    # __TODO__ fix architecture list
+    if architecture == 'simple_mlp':
+        model_class = MLPScoreNetwork
+    else:
+        raise ValueError('architecture {} not supported'.format(architecture))
+    logger.info('selected architecture: {}'.format(architecture))
+
+    model = model_class(hyper_params)
+    logger.info('model info:\n' + str(model) + '\n')
+
+    return model

From c9080693f5eb35122c5056b77a9df0abd4454d7e Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Thu, 4 Apr 2024 09:53:11 -0400
Subject: [PATCH 6/6] fix truncated comment

---
 examples/local/config_diffusion.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/local/config_diffusion.yaml b/examples/local/config_diffusion.yaml
index 042f5fbe..8ed5fa27 100644
--- a/examples/local/config_diffusion.yaml
+++ b/examples/local/config_diffusion.yaml
@@ -16,7 +16,7 @@ data:
 spatial_dimension: 3
 model:
   score_network:
-    hidden_dimensions: [16, 16]  # dimensions of the hidden layers. Length of array determines number of la
+    hidden_dimensions: [16, 16]  # dimensions of the hidden layers. Length of array determines number of layers
   noise:
     total_time_steps: 10
     sigma_min: 0.005  # default value