From 328cd298a51afdf41d18e01931821b3ec80e959b Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 13:58:47 -0400
Subject: [PATCH 01/13] adding condition onf forces in diffiusion mace and
 cleaning the config files examples

---
 crystal_diffusion/models/diffusion_mace.py    | 45 +++++++---
 .../diffusion_mace_score_network.py           |  7 +-
 .../diffusion/config_diffusion_mace.yaml      |  3 +
 .../diffusion/config_diffusion_mlp.yaml       |  5 +-
 .../config_mace_equivariant_head.yaml         |  0
 .../diffusion/config_mace_mlp_head.yaml       |  0
 .../diffusion/config_diffusion_mace.yaml      | 82 -----------------
 .../diffusion/config_diffusion_mlp.yaml       | 77 ----------------
 .../config_mace_equivariant_head.yaml         | 86 ------------------
 .../diffusion/config_mace_mlp_head.yaml       | 87 -------------------
 .../narval/diffusion/config_diffusion.yaml    | 46 ----------
 11 files changed, 45 insertions(+), 393 deletions(-)
 rename examples/{local => config_files}/diffusion/config_diffusion_mace.yaml (95%)
 rename examples/{local => config_files}/diffusion/config_diffusion_mlp.yaml (97%)
 rename examples/{local => config_files}/diffusion/config_mace_equivariant_head.yaml (100%)
 rename examples/{local => config_files}/diffusion/config_mace_mlp_head.yaml (100%)
 delete mode 100644 examples/mila_cluster/diffusion/config_diffusion_mace.yaml
 delete mode 100644 examples/mila_cluster/diffusion/config_diffusion_mlp.yaml
 delete mode 100644 examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml
 delete mode 100644 examples/mila_cluster/diffusion/config_mace_mlp_head.yaml
 delete mode 100644 examples/narval/diffusion/config_diffusion.yaml

diff --git a/crystal_diffusion/models/diffusion_mace.py b/crystal_diffusion/models/diffusion_mace.py
index deef4eb0..91b25772 100644
--- a/crystal_diffusion/models/diffusion_mace.py
+++ b/crystal_diffusion/models/diffusion_mace.py
@@ -2,15 +2,15 @@
 
 import torch
 from e3nn import o3
-from e3nn.nn import Activation, BatchNorm
+from e3nn.nn import Activation
 from mace.modules import (EquivariantProductBasisBlock, InteractionBlock,
                           LinearNodeEmbeddingBlock, RadialEmbeddingBlock)
 from mace.modules.utils import get_edge_vectors_and_lengths
 from torch_geometric.data import Data
 
 from crystal_diffusion.models.mace_utils import get_adj_matrix
-from crystal_diffusion.namespace import (NOISE, NOISY_CARTESIAN_POSITIONS,
-                                         UNIT_CELL)
+from crystal_diffusion.namespace import (CARTESIAN_FORCES, NOISE,
+                                         NOISY_CARTESIAN_POSITIONS, UNIT_CELL)
 
 
 class LinearVectorReadoutBlock(torch.nn.Module):
@@ -64,6 +64,9 @@ def input_to_diffusion_mace(batch: Dict[AnyStr, torch.Tensor], radial_cutoff: fl
 
     flat_basis_vectors = basis_vectors.view(-1, spatial_dimension)  # batch * spatial_dimension, spatial_dimension
     # create the pytorch-geometric graph
+
+    forces = batch[CARTESIAN_FORCES].view(-1, spatial_dimension)  # batch * n_atom_per_graph, spatial dimension
+
     graph_data = Data(edge_index=adj_matrix,
                       node_attrs=node_attrs.to(device),
                       node_diffusion_scalars=node_diffusion_scalars.to(device),
@@ -71,7 +74,8 @@ def input_to_diffusion_mace(batch: Dict[AnyStr, torch.Tensor], radial_cutoff: fl
                       ptr=ptr.to(device),
                       batch=batch_tensor.to(device),
                       shifts=shift_matrix,
-                      cell=flat_basis_vectors
+                      cell=flat_basis_vectors,
+                      forces=forces,
                       )
     return graph_data
 
@@ -102,6 +106,7 @@ def __init__(
         gate: Optional[Callable],
         radial_MLP: List[int],
         radial_type: Optional[str] = "bessel",
+        condition_embedding_size: int = 64  # dimension of the conditional variable embedding - assumed to be l=1 (odd)
     ):
         """Init method."""
         assert num_elements == 1, "only a single element can be used at this time. Set 'num_elements' to 1."
@@ -142,13 +147,10 @@ def __init__(
                            irreps_out=diffusion_scalar_irreps_out,
                            biases=True)
         self.diffusion_scalar_embedding.append(linear)
+        non_linearity = Activation(irreps_in=diffusion_scalar_irreps_out, acts=[gate])
         for _ in range(number_of_mlp_layers):
-            non_linearity = Activation(irreps_in=diffusion_scalar_irreps_out, acts=[gate])
             self.diffusion_scalar_embedding.append(non_linearity)
 
-            normalization = BatchNorm(diffusion_scalar_irreps_out)
-            self.diffusion_scalar_embedding.append(normalization)
-
             linear = o3.Linear(irreps_in=diffusion_scalar_irreps_out,
                                irreps_out=diffusion_scalar_irreps_out,
                                biases=True)
@@ -255,7 +257,25 @@ def __init__(
         # the output is a single vector.
         self.vector_readout = LinearVectorReadoutBlock(irreps_in=hidden_irreps_out)
 
-    def forward(self, data: Dict[str, torch.Tensor]) -> torch.Tensor:
+        # Apply a MLP with a bias on the forces as a conditional feature. This would be a 1o irrep
+        forces_irreps_in = o3.Irreps("1x1o")
+        # the l=0 irreps is there to allow a bias in the embedding
+        forces_irreps_embedding = o3.Irreps(f"{condition_embedding_size}x0e + {condition_embedding_size}x1o")
+        self.condition_embedding_layer = o3.Linear(irreps_in=forces_irreps_in,
+                                                   irreps_out=forces_irreps_embedding,
+                                                   biases=True)
+
+        # conditional layers for the forces as a conditional feature to guide the diffusion
+        self.conditional_layers = torch.nn.ModuleList([])
+        for _ in range(num_interactions):
+            cond_layer = o3.Linear(
+                irreps_in=forces_irreps_embedding,
+                irreps_out=hidden_irreps_out,
+                biases=True
+            )
+            self.conditional_layers.append(cond_layer)
+
+    def forward(self, data: Dict[str, torch.Tensor], conditional: bool = False) -> torch.Tensor:
         """Forward method."""
         # Setup
 
@@ -274,7 +294,9 @@ def forward(self, data: Dict[str, torch.Tensor]) -> torch.Tensor:
         edge_attrs = self.spherical_harmonics(vectors)
         edge_feats = self.radial_embedding(lengths)
 
-        for interaction, product in zip(self.interactions, self.products):
+        forces_embedding = self.condition_embedding_layer(data["forces"])  # 0e + 1o embedding
+
+        for interaction, product, cond_layer in zip(self.interactions, self.products, self.conditional_layers):
             node_feats, sc = interaction(
                 node_attrs=augmented_node_attributes,
                 node_feats=node_feats,
@@ -287,6 +309,9 @@ def forward(self, data: Dict[str, torch.Tensor]) -> torch.Tensor:
                 sc=sc,
                 node_attrs=augmented_node_attributes,
             )
+            if conditional:  # modify the node features to account for the conditional features i.e. forces
+                force_embed = cond_layer(forces_embedding)
+                node_feats += force_embed
 
         # Outputs
         vectors_output = self.vector_readout(node_feats)
diff --git a/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py b/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py
index d7ac688c..c8fc2330 100644
--- a/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py
+++ b/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py
@@ -37,6 +37,7 @@ class DiffusionMACEScoreNetworkParameters(ScoreNetworkParameters):
     gate: str = "silu"  # non linearity for last readout - choices: ["silu", "tanh", "abs", "None"]
     radial_MLP: List[int] = field(default_factory=lambda: [64, 64, 64])  # "width of the radial MLP"
     radial_type: str = "bessel"  # type of radial basis functions - choices=["bessel", "gaussian", "chebyshev"]
+    condition_embedding_size: int = 64  # dimension of the conditional variable embedding - assumed to be l=1 (odd)
 
 
 class DiffusionMACEScoreNetwork(ScoreNetwork):
@@ -71,7 +72,8 @@ def __init__(self, hyper_params: DiffusionMACEScoreNetworkParameters):
             correlation=hyper_params.correlation,
             gate=gate_dict[hyper_params.gate],
             radial_MLP=hyper_params.radial_MLP,
-            radial_type=hyper_params.radial_type
+            radial_type=hyper_params.radial_type,
+            condition_embedding_size=hyper_params.condition_embedding_size,
         )
 
         self._natoms = hyper_params.number_of_atoms
@@ -100,7 +102,6 @@ def _forward_unchecked(self, batch: Dict[AnyStr, torch.Tensor], conditional: boo
         Returns:
             output : the scores computed by the model as a [batch_size, n_atom, spatial_dimension] tensor.
         """
-        del conditional  # TODO do something with forces when conditional
         relative_coordinates = batch[NOISY_RELATIVE_COORDINATES]
         batch_size, number_of_atoms, spatial_dimension = relative_coordinates.shape
 
@@ -108,7 +109,7 @@ def _forward_unchecked(self, batch: Dict[AnyStr, torch.Tensor], conditional: boo
         batch[NOISY_CARTESIAN_POSITIONS] = get_positions_from_coordinates(relative_coordinates, basis_vectors)
         graph_input = input_to_diffusion_mace(batch, radial_cutoff=self.r_max)
 
-        flat_cartesian_scores = self.diffusion_mace_network(graph_input)
+        flat_cartesian_scores = self.diffusion_mace_network(graph_input, conditional)
         cartesian_scores = flat_cartesian_scores.reshape(batch_size, number_of_atoms, spatial_dimension)
 
         reciprocal_basis_vectors_as_columns = get_reciprocal_basis_vectors(basis_vectors)
diff --git a/examples/local/diffusion/config_diffusion_mace.yaml b/examples/config_files/diffusion/config_diffusion_mace.yaml
similarity index 95%
rename from examples/local/diffusion/config_diffusion_mace.yaml
rename to examples/config_files/diffusion/config_diffusion_mace.yaml
index bfc67af5..6fc34caf 100644
--- a/examples/local/diffusion/config_diffusion_mace.yaml
+++ b/examples/config_files/diffusion/config_diffusion_mace.yaml
@@ -37,6 +37,9 @@ model:
     gate: silu
     radial_MLP: [8, 8, 8]
     radial_type: bessel
+    conditional_prob: 0.0
+    conditional_gamma: 2
+    condition_embedding_size: 64
   noise:
     total_time_steps: 100
     sigma_min: 0.001  # default value
diff --git a/examples/local/diffusion/config_diffusion_mlp.yaml b/examples/config_files/diffusion/config_diffusion_mlp.yaml
similarity index 97%
rename from examples/local/diffusion/config_diffusion_mlp.yaml
rename to examples/config_files/diffusion/config_diffusion_mlp.yaml
index 6815e1b5..d27d27e9 100644
--- a/examples/local/diffusion/config_diffusion_mlp.yaml
+++ b/examples/config_files/diffusion/config_diffusion_mlp.yaml
@@ -21,11 +21,12 @@ spatial_dimension: 3
 model:
   score_network:
     architecture: mlp
-    conditional_prob: 0.0
-    conditional_gamma: 2
     number_of_atoms: 8
     n_hidden_dimensions: 2
     hidden_dimensions_size: 64
+    conditional_prob: 0.0
+    conditional_gamma: 2
+    condition_embedding_size: 64
   noise:
     total_time_steps: 100
     sigma_min: 0.005  # default value
diff --git a/examples/local/diffusion/config_mace_equivariant_head.yaml b/examples/config_files/diffusion/config_mace_equivariant_head.yaml
similarity index 100%
rename from examples/local/diffusion/config_mace_equivariant_head.yaml
rename to examples/config_files/diffusion/config_mace_equivariant_head.yaml
diff --git a/examples/local/diffusion/config_mace_mlp_head.yaml b/examples/config_files/diffusion/config_mace_mlp_head.yaml
similarity index 100%
rename from examples/local/diffusion/config_mace_mlp_head.yaml
rename to examples/config_files/diffusion/config_mace_mlp_head.yaml
diff --git a/examples/mila_cluster/diffusion/config_diffusion_mace.yaml b/examples/mila_cluster/diffusion/config_diffusion_mace.yaml
deleted file mode 100644
index 0efa2096..00000000
--- a/examples/mila_cluster/diffusion/config_diffusion_mace.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-# general
-exp_name: diffusion_mace_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-gradient_clipping: 0.1
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 512
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: diffusion_mace
-    number_of_atoms: 8
-    r_max: 5.0
-    num_bessel: 8
-    num_polynomial_cutoff: 5
-    max_ell: 2
-    interaction_cls: RealAgnosticResidualInteractionBlock
-    interaction_cls_first: RealAgnosticInteractionBlock
-    num_interactions: 2
-    hidden_irreps: 8x0e + 8x1o
-    mlp_irreps: 8x0e
-    
-    avg_num_neighbors: 1
-    correlation: 3
-    gate: silu
-    radial_MLP: [8, 8, 8]
-    radial_type: bessel
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.001  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.001  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_every_n_epochs: 1
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - csv
-  - tensorboard
-  - comet
diff --git a/examples/mila_cluster/diffusion/config_diffusion_mlp.yaml b/examples/mila_cluster/diffusion/config_diffusion_mlp.yaml
deleted file mode 100644
index 0f55a9d4..00000000
--- a/examples/mila_cluster/diffusion/config_diffusion_mlp.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# general
-exp_name: mlp_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-gradient_clipping: 0
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 1024
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: mlp
-    conditional_prob: 0.0
-    conditional_gamma: 2
-    number_of_atoms: 8
-    n_hidden_dimensions: 2
-    hidden_dimensions_size: 64
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# A callback to check the loss vs. sigma
-loss_monitoring: 
-  number_of_bins: 50
-  sample_every_n_epochs: 2
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.001  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_batchsize: None
-    sample_every_n_epochs: 2
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - comet
-#- tensorboard
-#- csv
diff --git a/examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml b/examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml
deleted file mode 100644
index ebb99d36..00000000
--- a/examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-# general
-exp_name: mace_equivariant_head_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 1024
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: mace
-    number_of_atoms: 8  
-    r_max: 5.0
-    num_bessel: 8
-    num_polynomial_cutoff: 5
-    max_ell: 2
-    interaction_cls: RealAgnosticResidualInteractionBlock
-    interaction_cls_first: RealAgnosticInteractionBlock
-    num_interactions: 2
-    hidden_irreps: 8x0e + 8x1o
-    MLP_irreps: 8x0e
-    avg_num_neighbors: 1
-    correlation: 3
-    gate: silu
-    radial_MLP: [8, 8, 8]
-    radial_type: bessel
-    prediction_head_parameters:
-      name: equivariant
-      time_embedding_irreps: "8x0e"
-      gate: "silu"
-      number_of_layers: 3
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_batchsize: None
-    sample_every_n_epochs: 1
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - csv
-  - tensorboard
-  - comet
diff --git a/examples/mila_cluster/diffusion/config_mace_mlp_head.yaml b/examples/mila_cluster/diffusion/config_mace_mlp_head.yaml
deleted file mode 100644
index 94700c8c..00000000
--- a/examples/mila_cluster/diffusion/config_mace_mlp_head.yaml
+++ /dev/null
@@ -1,87 +0,0 @@
-# general
-exp_name: mace_mlp_head_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 512
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: mace
-    use_pretrained: None
-    pretrained_weights_path: ./
-    number_of_atoms: 8
-    r_max: 5.0
-    num_bessel: 8
-    num_polynomial_cutoff: 5
-    max_ell: 2
-    interaction_cls: RealAgnosticResidualInteractionBlock
-    interaction_cls_first: RealAgnosticInteractionBlock
-    num_interactions: 2
-    hidden_irreps: 8x0e + 8x1o
-    MLP_irreps: 8x0e
-    avg_num_neighbors: 1
-    correlation: 3
-    gate: silu
-    radial_MLP: [8, 8, 8]
-    radial_type: bessel
-    prediction_head_parameters:
-      name: mlp
-      hidden_dimensions_size: 8
-      n_hidden_dimensions: 3
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_batchsize: None
-    sample_every_n_epochs: 1
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - csv
-  - tensorboard
-  - comet
diff --git a/examples/narval/diffusion/config_diffusion.yaml b/examples/narval/diffusion/config_diffusion.yaml
deleted file mode 100644
index ae4c2963..00000000
--- a/examples/narval/diffusion/config_diffusion.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# general
-exp_name: example_narval
-run_name: run1
-max_epoch: 3
-log_every_n_steps: 1
-# fast_dev_run: True
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-gradient_clipping: 0
-
-# data
-data:
-  batch_size: 128
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    n_hidden_dimensions: 2
-    hidden_dimensions_size: 64
-  noise:
-    total_time_steps: 10
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value
-
-# optimizer
-optimizer:
-  name: adam
-  learning_rate: 0.001
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 100
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-logging:
-  - csv
-  - comet

From 842e26004955f153678db8be3f70d7f9b0d7584f Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 14:14:33 -0400
Subject: [PATCH 02/13] more example clean-up

---
 .../config_diffusion_mace_orion.yaml          | 86 +++++++++++++++++++
 .../diffusion/config_diffusion_mlp_orion.yaml | 79 +++++++++++++++++
 examples/local/diffusion/run_diffusion.sh     |  2 +-
 .../diffusion/config_diffusion.yaml           | 45 ----------
 examples/local_orion/diffusion/run_orion.sh   |  2 +-
 .../diffusion/config_diffusion.yaml           | 45 ----------
 6 files changed, 167 insertions(+), 92 deletions(-)
 create mode 100644 examples/config_files/diffusion/config_diffusion_mace_orion.yaml
 create mode 100644 examples/config_files/diffusion/config_diffusion_mlp_orion.yaml
 delete mode 100644 examples/local_orion/diffusion/config_diffusion.yaml
 delete mode 100644 examples/narval_orion/diffusion/config_diffusion.yaml

diff --git a/examples/config_files/diffusion/config_diffusion_mace_orion.yaml b/examples/config_files/diffusion/config_diffusion_mace_orion.yaml
new file mode 100644
index 00000000..1daf775b
--- /dev/null
+++ b/examples/config_files/diffusion/config_diffusion_mace_orion.yaml
@@ -0,0 +1,86 @@
+# general
+exp_name: diffusion_mace_example
+run_name: run_debug_delete_me
+max_epoch: 10
+log_every_n_steps: 1
+gradient_clipping: 0.1
+accumulate_grad_batches: 1  # make this number of forward passes before doing a backprop step
+
+# set to null to avoid setting a seed (can speed up GPU computation, but
+# results will not be reproducible)
+seed: 1234
+
+# data
+data:
+  batch_size: 512
+  num_workers: 0
+  max_atom: 8
+
+# architecture
+spatial_dimension: 3
+model:
+  score_network:
+    architecture: diffusion_mace
+    number_of_atoms: 8
+    r_max: 5.0
+    num_bessel: 8
+    num_polynomial_cutoff: 5
+    max_ell: 2
+    interaction_cls: RealAgnosticResidualInteractionBlock
+    interaction_cls_first: RealAgnosticInteractionBlock
+    num_interactions: 2
+    hidden_irreps: 'orion~choices(["8x0e + 8x1o", "16x0e + 16x1o + 16x2e", "32x0e + 32x1o + 32x2e + 32x3o"])'
+    mlp_irreps: 'orion~choices(["8x0e", "32x0e"])'
+    number_of_mlp_layers: 0
+    avg_num_neighbors: 1
+    correlation: 3
+    gate: silu
+    radial_MLP: 'orion~choices([[8, 8, 8], [32, 32, 32], [64, 64]])'
+    radial_type: bessel
+    conditional_prob: 'orion~choices([0.0, 0.25, 0.5, 0.75])'
+    conditional_gamma: 2
+    condition_embedding_size: 'orion~choices([32, 64])'
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001  # default value
+    sigma_max: 0.5  # default value'
+
+# optimizer and scheduler
+optimizer:
+  name: adamw
+  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
+  weight_decay: 1.0e-6
+
+scheduler:
+  name: ReduceLROnPlateau
+  factor: 0.1
+  patience: 3
+
+# early stopping
+early_stopping:
+  metric: validation_epoch_loss
+  mode: min
+  patience: 10
+
+model_checkpoint:
+  monitor: validation_epoch_loss
+  mode: min
+
+# Sampling from the generative model
+diffusion_sampling:
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001  # default value
+    sigma_max: 0.5  # default value
+  sampling:
+    spatial_dimension: 3
+    number_of_corrector_steps: 1
+    number_of_atoms: 8
+    number_of_samples: 16
+    sample_every_n_epochs: 1
+    cell_dimensions: [5.43, 5.43, 5.43]
+
+logging:
+  #  - csv
+  #  - tensorboard
+  - comet
diff --git a/examples/config_files/diffusion/config_diffusion_mlp_orion.yaml b/examples/config_files/diffusion/config_diffusion_mlp_orion.yaml
new file mode 100644
index 00000000..91a2bd19
--- /dev/null
+++ b/examples/config_files/diffusion/config_diffusion_mlp_orion.yaml
@@ -0,0 +1,79 @@
+# general
+exp_name: mlp_example
+run_name: run_debug_delete_me
+max_epoch: 10
+log_every_n_steps: 1
+gradient_clipping: 0
+accumulate_grad_batches: 1  # make this number of forward passes before doing a backprop step
+
+# set to null to avoid setting a seed (can speed up GPU computation, but
+# results will not be reproducible)
+seed: 1234
+
+# data
+data:
+  batch_size: 1024
+  num_workers: 0
+  max_atom: 8
+
+# architecture
+spatial_dimension: 3
+model:
+  score_network:
+    architecture: mlp
+    number_of_atoms: 8
+    n_hidden_dimensions: 'orion~choices([1, 2, 3, 4])'
+    hidden_dimensions_size: 'orion~choices([16, 32, 64])'
+    conditional_prob: 'orion~choices([0.0, 0.25, 0.5])'
+    conditional_gamma: 2
+    condition_embedding_size: 'orion~choices([32, 64])'
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.005  # default value
+    sigma_max: 0.5  # default value'
+
+# optimizer and scheduler
+optimizer:
+  name: adamw
+  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
+  weight_decay: 1.0e-6
+
+scheduler:
+  name: ReduceLROnPlateau
+  factor: 0.1
+  patience: 3
+
+# early stopping
+early_stopping:
+  metric: validation_epoch_loss
+  mode: min
+  patience: 10
+
+model_checkpoint:
+  monitor: validation_epoch_loss
+  mode: min
+
+# A callback to check the loss vs. sigma
+loss_monitoring: 
+  number_of_bins: 50
+  sample_every_n_epochs: 2
+
+# Sampling from the generative model
+diffusion_sampling:
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001  # default value
+    sigma_max: 0.5  # default value
+  sampling:
+    spatial_dimension: 3
+    number_of_corrector_steps: 1
+    number_of_atoms: 8
+    number_of_samples: 16
+    sample_batchsize: None
+    sample_every_n_epochs: 2
+    cell_dimensions: [5.43, 5.43, 5.43]
+
+logging:
+  - comet
+#- tensorboard
+#- csv
diff --git a/examples/local/diffusion/run_diffusion.sh b/examples/local/diffusion/run_diffusion.sh
index 6860d6ca..ca54f033 100755
--- a/examples/local/diffusion/run_diffusion.sh
+++ b/examples/local/diffusion/run_diffusion.sh
@@ -3,7 +3,7 @@
 # This example assumes that the dataset 'si_diffusion_small' is present locally in the DATA folder.
 # It is also assumed that the user has a Comet account for logging experiments.
 
-CONFIG=config_diffusion_mace.yaml
+CONFIG=../../config_files/diffusion/config_diffusion_mace.yaml
 DATA_DIR=../../../data/si_diffusion_1x1x1
 PROCESSED_DATA=${DATA_DIR}/processed
 DATA_WORK_DIR=./tmp_work_dir/
diff --git a/examples/local_orion/diffusion/config_diffusion.yaml b/examples/local_orion/diffusion/config_diffusion.yaml
deleted file mode 100644
index e2103835..00000000
--- a/examples/local_orion/diffusion/config_diffusion.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-# general
-exp_name: example_experiment
-max_epoch: 2
-log_every_n_steps: 1
-# fast_dev_run: True
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-gradient_clipping: 0
-
-# data
-data:
-  batch_size: 128
-  num_workers: 4
-  max_atom: 64
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    n_hidden_dimensions: 'orion~choices([1, 2, 3, 4])'
-    hidden_dimensions_size: 'orion~choices([256, 512, 1024, 2048])'
-  noise:
-    total_time_steps: 'orion~uniform(2, 20, discrete=True)'
-    sigma_min: 'orion~choices([0.001, 0.005, 0.01])'
-    sigma_max: 'orion~choices([0.1, 0.5, 0.75])'
-
-# optimizer
-optimizer:
-  name: adam
-  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
-
-# early stopping
-early_stopping:
-  metric: val_loss
-  mode: min
-  patience: 100
-
-model_checkpoint:
-  monitor: val_loss
-  mode: min
-
-logging:
-  - tensorboard
-  - comet
diff --git a/examples/local_orion/diffusion/run_orion.sh b/examples/local_orion/diffusion/run_orion.sh
index 254b0e15..927a1753 100755
--- a/examples/local_orion/diffusion/run_orion.sh
+++ b/examples/local_orion/diffusion/run_orion.sh
@@ -2,7 +2,7 @@ export ORION_DB_ADDRESS='orion_db.pkl'
 export ORION_DB_TYPE='pickleddb'
 
 ROOT_DIR=../../../
-CONFIG=config_diffusion.yaml
+CONFIG=../../config_files/diffusion/config_diffusion_mlp_orion.yaml
 DATA_DIR=${ROOT_DIR}/data/si_diffusion_small
 PROCESSED_DATA=${DATA_DIR}/processed
 DATA_WORK_DIR=./tmp_work_dir/
diff --git a/examples/narval_orion/diffusion/config_diffusion.yaml b/examples/narval_orion/diffusion/config_diffusion.yaml
deleted file mode 100644
index 65c433d9..00000000
--- a/examples/narval_orion/diffusion/config_diffusion.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-# general
-exp_name: example_experiment
-max_epoch: 2
-log_every_n_steps: 1
-# fast_dev_run: True
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-gradient_clipping: 0
-
-# data
-data:
-  batch_size: 128
-  num_workers: 4
-  max_atom: 64
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    n_hidden_dimensions: 'orion~choices([1, 2, 3, 4])'
-    hidden_dimensions_size: 'orion~choices([256, 512, 1024, 2048])'
-  noise:
-    total_time_steps: 'orion~uniform(2, 20, discrete=True)'
-    sigma_min: 'orion~choices([0.001, 0.005, 0.01])'
-    sigma_max: 'orion~choices([0.1, 0.5, 0.75])'
-
-# optimizer
-optimizer:
-  name: adam
-  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 100
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-logging:
-  - tensorboard
-  - comet

From 0e0fc996b85a7bcd5cc9282540c41ab7ec39aa2e Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 15:13:59 -0400
Subject: [PATCH 03/13] fixing a bug with biases in condition_embedding_layer

---
 crystal_diffusion/models/diffusion_mace.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/crystal_diffusion/models/diffusion_mace.py b/crystal_diffusion/models/diffusion_mace.py
index 91b25772..840f9f59 100644
--- a/crystal_diffusion/models/diffusion_mace.py
+++ b/crystal_diffusion/models/diffusion_mace.py
@@ -259,11 +259,10 @@ def __init__(
 
         # Apply a MLP with a bias on the forces as a conditional feature. This would be a 1o irrep
         forces_irreps_in = o3.Irreps("1x1o")
-        # the l=0 irreps is there to allow a bias in the embedding
-        forces_irreps_embedding = o3.Irreps(f"{condition_embedding_size}x0e + {condition_embedding_size}x1o")
+        forces_irreps_embedding = o3.Irreps(f"{condition_embedding_size}x1o")
         self.condition_embedding_layer = o3.Linear(irreps_in=forces_irreps_in,
                                                    irreps_out=forces_irreps_embedding,
-                                                   biases=True)
+                                                   biases=False)  # can't have biases with 1o irreps
 
         # conditional layers for the forces as a conditional feature to guide the diffusion
         self.conditional_layers = torch.nn.ModuleList([])

From 7e304aad81bfc12c70c60a967b400ca7c7dfe851 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 13:58:47 -0400
Subject: [PATCH 04/13] adding condition onf forces in diffiusion mace and
 cleaning the config files examples

---
 crystal_diffusion/models/diffusion_mace.py    | 40 +++++++--
 .../diffusion_mace_score_network.py           |  7 +-
 .../diffusion/config_diffusion_mace.yaml      |  3 +
 .../diffusion/config_diffusion_mlp.yaml       |  5 +-
 .../config_mace_equivariant_head.yaml         |  0
 .../diffusion/config_mace_mlp_head.yaml       |  0
 .../diffusion/config_diffusion_mace.yaml      | 82 -----------------
 .../diffusion/config_diffusion_mlp.yaml       | 77 ----------------
 .../config_mace_equivariant_head.yaml         | 86 ------------------
 .../diffusion/config_mace_mlp_head.yaml       | 87 -------------------
 .../narval/diffusion/config_diffusion.yaml    | 46 ----------
 11 files changed, 44 insertions(+), 389 deletions(-)
 rename examples/{local => config_files}/diffusion/config_diffusion_mace.yaml (95%)
 rename examples/{local => config_files}/diffusion/config_diffusion_mlp.yaml (97%)
 rename examples/{local => config_files}/diffusion/config_mace_equivariant_head.yaml (100%)
 rename examples/{local => config_files}/diffusion/config_mace_mlp_head.yaml (100%)
 delete mode 100644 examples/mila_cluster/diffusion/config_diffusion_mace.yaml
 delete mode 100644 examples/mila_cluster/diffusion/config_diffusion_mlp.yaml
 delete mode 100644 examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml
 delete mode 100644 examples/mila_cluster/diffusion/config_mace_mlp_head.yaml
 delete mode 100644 examples/narval/diffusion/config_diffusion.yaml

diff --git a/crystal_diffusion/models/diffusion_mace.py b/crystal_diffusion/models/diffusion_mace.py
index 80b79f1c..91b25772 100644
--- a/crystal_diffusion/models/diffusion_mace.py
+++ b/crystal_diffusion/models/diffusion_mace.py
@@ -9,8 +9,8 @@
 from torch_geometric.data import Data
 
 from crystal_diffusion.models.mace_utils import get_adj_matrix
-from crystal_diffusion.namespace import (NOISE, NOISY_CARTESIAN_POSITIONS,
-                                         UNIT_CELL)
+from crystal_diffusion.namespace import (CARTESIAN_FORCES, NOISE,
+                                         NOISY_CARTESIAN_POSITIONS, UNIT_CELL)
 
 
 class LinearVectorReadoutBlock(torch.nn.Module):
@@ -64,6 +64,9 @@ def input_to_diffusion_mace(batch: Dict[AnyStr, torch.Tensor], radial_cutoff: fl
 
     flat_basis_vectors = basis_vectors.view(-1, spatial_dimension)  # batch * spatial_dimension, spatial_dimension
     # create the pytorch-geometric graph
+
+    forces = batch[CARTESIAN_FORCES].view(-1, spatial_dimension)  # batch * n_atom_per_graph, spatial dimension
+
     graph_data = Data(edge_index=adj_matrix,
                       node_attrs=node_attrs.to(device),
                       node_diffusion_scalars=node_diffusion_scalars.to(device),
@@ -71,7 +74,8 @@ def input_to_diffusion_mace(batch: Dict[AnyStr, torch.Tensor], radial_cutoff: fl
                       ptr=ptr.to(device),
                       batch=batch_tensor.to(device),
                       shifts=shift_matrix,
-                      cell=flat_basis_vectors
+                      cell=flat_basis_vectors,
+                      forces=forces,
                       )
     return graph_data
 
@@ -102,6 +106,7 @@ def __init__(
         gate: Optional[Callable],
         radial_MLP: List[int],
         radial_type: Optional[str] = "bessel",
+        condition_embedding_size: int = 64  # dimension of the conditional variable embedding - assumed to be l=1 (odd)
     ):
         """Init method."""
         assert num_elements == 1, "only a single element can be used at this time. Set 'num_elements' to 1."
@@ -142,8 +147,8 @@ def __init__(
                            irreps_out=diffusion_scalar_irreps_out,
                            biases=True)
         self.diffusion_scalar_embedding.append(linear)
+        non_linearity = Activation(irreps_in=diffusion_scalar_irreps_out, acts=[gate])
         for _ in range(number_of_mlp_layers):
-            non_linearity = Activation(irreps_in=diffusion_scalar_irreps_out, acts=[gate])
             self.diffusion_scalar_embedding.append(non_linearity)
 
             linear = o3.Linear(irreps_in=diffusion_scalar_irreps_out,
@@ -252,7 +257,25 @@ def __init__(
         # the output is a single vector.
         self.vector_readout = LinearVectorReadoutBlock(irreps_in=hidden_irreps_out)
 
-    def forward(self, data: Dict[str, torch.Tensor]) -> torch.Tensor:
+        # Apply a MLP with a bias on the forces as a conditional feature. This would be a 1o irrep
+        forces_irreps_in = o3.Irreps("1x1o")
+        # the l=0 irreps is there to allow a bias in the embedding
+        forces_irreps_embedding = o3.Irreps(f"{condition_embedding_size}x0e + {condition_embedding_size}x1o")
+        self.condition_embedding_layer = o3.Linear(irreps_in=forces_irreps_in,
+                                                   irreps_out=forces_irreps_embedding,
+                                                   biases=True)
+
+        # conditional layers for the forces as a conditional feature to guide the diffusion
+        self.conditional_layers = torch.nn.ModuleList([])
+        for _ in range(num_interactions):
+            cond_layer = o3.Linear(
+                irreps_in=forces_irreps_embedding,
+                irreps_out=hidden_irreps_out,
+                biases=True
+            )
+            self.conditional_layers.append(cond_layer)
+
+    def forward(self, data: Dict[str, torch.Tensor], conditional: bool = False) -> torch.Tensor:
         """Forward method."""
         # Setup
 
@@ -271,7 +294,9 @@ def forward(self, data: Dict[str, torch.Tensor]) -> torch.Tensor:
         edge_attrs = self.spherical_harmonics(vectors)
         edge_feats = self.radial_embedding(lengths)
 
-        for interaction, product in zip(self.interactions, self.products):
+        forces_embedding = self.condition_embedding_layer(data["forces"])  # 0e + 1o embedding
+
+        for interaction, product, cond_layer in zip(self.interactions, self.products, self.conditional_layers):
             node_feats, sc = interaction(
                 node_attrs=augmented_node_attributes,
                 node_feats=node_feats,
@@ -284,6 +309,9 @@ def forward(self, data: Dict[str, torch.Tensor]) -> torch.Tensor:
                 sc=sc,
                 node_attrs=augmented_node_attributes,
             )
+            if conditional:  # modify the node features to account for the conditional features i.e. forces
+                force_embed = cond_layer(forces_embedding)
+                node_feats += force_embed
 
         # Outputs
         vectors_output = self.vector_readout(node_feats)
diff --git a/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py b/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py
index d7ac688c..c8fc2330 100644
--- a/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py
+++ b/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py
@@ -37,6 +37,7 @@ class DiffusionMACEScoreNetworkParameters(ScoreNetworkParameters):
     gate: str = "silu"  # non linearity for last readout - choices: ["silu", "tanh", "abs", "None"]
     radial_MLP: List[int] = field(default_factory=lambda: [64, 64, 64])  # "width of the radial MLP"
     radial_type: str = "bessel"  # type of radial basis functions - choices=["bessel", "gaussian", "chebyshev"]
+    condition_embedding_size: int = 64  # dimension of the conditional variable embedding - assumed to be l=1 (odd)
 
 
 class DiffusionMACEScoreNetwork(ScoreNetwork):
@@ -71,7 +72,8 @@ def __init__(self, hyper_params: DiffusionMACEScoreNetworkParameters):
             correlation=hyper_params.correlation,
             gate=gate_dict[hyper_params.gate],
             radial_MLP=hyper_params.radial_MLP,
-            radial_type=hyper_params.radial_type
+            radial_type=hyper_params.radial_type,
+            condition_embedding_size=hyper_params.condition_embedding_size,
         )
 
         self._natoms = hyper_params.number_of_atoms
@@ -100,7 +102,6 @@ def _forward_unchecked(self, batch: Dict[AnyStr, torch.Tensor], conditional: boo
         Returns:
             output : the scores computed by the model as a [batch_size, n_atom, spatial_dimension] tensor.
         """
-        del conditional  # TODO do something with forces when conditional
         relative_coordinates = batch[NOISY_RELATIVE_COORDINATES]
         batch_size, number_of_atoms, spatial_dimension = relative_coordinates.shape
 
@@ -108,7 +109,7 @@ def _forward_unchecked(self, batch: Dict[AnyStr, torch.Tensor], conditional: boo
         batch[NOISY_CARTESIAN_POSITIONS] = get_positions_from_coordinates(relative_coordinates, basis_vectors)
         graph_input = input_to_diffusion_mace(batch, radial_cutoff=self.r_max)
 
-        flat_cartesian_scores = self.diffusion_mace_network(graph_input)
+        flat_cartesian_scores = self.diffusion_mace_network(graph_input, conditional)
         cartesian_scores = flat_cartesian_scores.reshape(batch_size, number_of_atoms, spatial_dimension)
 
         reciprocal_basis_vectors_as_columns = get_reciprocal_basis_vectors(basis_vectors)
diff --git a/examples/local/diffusion/config_diffusion_mace.yaml b/examples/config_files/diffusion/config_diffusion_mace.yaml
similarity index 95%
rename from examples/local/diffusion/config_diffusion_mace.yaml
rename to examples/config_files/diffusion/config_diffusion_mace.yaml
index bfc67af5..6fc34caf 100644
--- a/examples/local/diffusion/config_diffusion_mace.yaml
+++ b/examples/config_files/diffusion/config_diffusion_mace.yaml
@@ -37,6 +37,9 @@ model:
     gate: silu
     radial_MLP: [8, 8, 8]
     radial_type: bessel
+    conditional_prob: 0.0
+    conditional_gamma: 2
+    condition_embedding_size: 64
   noise:
     total_time_steps: 100
     sigma_min: 0.001  # default value
diff --git a/examples/local/diffusion/config_diffusion_mlp.yaml b/examples/config_files/diffusion/config_diffusion_mlp.yaml
similarity index 97%
rename from examples/local/diffusion/config_diffusion_mlp.yaml
rename to examples/config_files/diffusion/config_diffusion_mlp.yaml
index 6815e1b5..d27d27e9 100644
--- a/examples/local/diffusion/config_diffusion_mlp.yaml
+++ b/examples/config_files/diffusion/config_diffusion_mlp.yaml
@@ -21,11 +21,12 @@ spatial_dimension: 3
 model:
   score_network:
     architecture: mlp
-    conditional_prob: 0.0
-    conditional_gamma: 2
     number_of_atoms: 8
     n_hidden_dimensions: 2
     hidden_dimensions_size: 64
+    conditional_prob: 0.0
+    conditional_gamma: 2
+    condition_embedding_size: 64
   noise:
     total_time_steps: 100
     sigma_min: 0.005  # default value
diff --git a/examples/local/diffusion/config_mace_equivariant_head.yaml b/examples/config_files/diffusion/config_mace_equivariant_head.yaml
similarity index 100%
rename from examples/local/diffusion/config_mace_equivariant_head.yaml
rename to examples/config_files/diffusion/config_mace_equivariant_head.yaml
diff --git a/examples/local/diffusion/config_mace_mlp_head.yaml b/examples/config_files/diffusion/config_mace_mlp_head.yaml
similarity index 100%
rename from examples/local/diffusion/config_mace_mlp_head.yaml
rename to examples/config_files/diffusion/config_mace_mlp_head.yaml
diff --git a/examples/mila_cluster/diffusion/config_diffusion_mace.yaml b/examples/mila_cluster/diffusion/config_diffusion_mace.yaml
deleted file mode 100644
index 145a0133..00000000
--- a/examples/mila_cluster/diffusion/config_diffusion_mace.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-# general
-exp_name: diffusion_mace_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-gradient_clipping: 0.1
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 512
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: diffusion_mace
-    number_of_atoms: 8
-    r_max: 5.0
-    num_bessel: 8
-    num_polynomial_cutoff: 5
-    max_ell: 2
-    interaction_cls: RealAgnosticResidualInteractionBlock
-    interaction_cls_first: RealAgnosticInteractionBlock
-    num_interactions: 2
-    hidden_irreps: 8x0e + 8x1o
-    mlp_irreps: 8x0e
-    number_of_mlp_layers: 0
-    avg_num_neighbors: 1
-    correlation: 3
-    gate: silu
-    radial_MLP: [8, 8, 8]
-    radial_type: bessel
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.001  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.001  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_every_n_epochs: 1
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - csv
-  - tensorboard
-  - comet
diff --git a/examples/mila_cluster/diffusion/config_diffusion_mlp.yaml b/examples/mila_cluster/diffusion/config_diffusion_mlp.yaml
deleted file mode 100644
index 0f55a9d4..00000000
--- a/examples/mila_cluster/diffusion/config_diffusion_mlp.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# general
-exp_name: mlp_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-gradient_clipping: 0
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 1024
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: mlp
-    conditional_prob: 0.0
-    conditional_gamma: 2
-    number_of_atoms: 8
-    n_hidden_dimensions: 2
-    hidden_dimensions_size: 64
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# A callback to check the loss vs. sigma
-loss_monitoring: 
-  number_of_bins: 50
-  sample_every_n_epochs: 2
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.001  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_batchsize: None
-    sample_every_n_epochs: 2
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - comet
-#- tensorboard
-#- csv
diff --git a/examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml b/examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml
deleted file mode 100644
index ebb99d36..00000000
--- a/examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-# general
-exp_name: mace_equivariant_head_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 1024
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: mace
-    number_of_atoms: 8  
-    r_max: 5.0
-    num_bessel: 8
-    num_polynomial_cutoff: 5
-    max_ell: 2
-    interaction_cls: RealAgnosticResidualInteractionBlock
-    interaction_cls_first: RealAgnosticInteractionBlock
-    num_interactions: 2
-    hidden_irreps: 8x0e + 8x1o
-    MLP_irreps: 8x0e
-    avg_num_neighbors: 1
-    correlation: 3
-    gate: silu
-    radial_MLP: [8, 8, 8]
-    radial_type: bessel
-    prediction_head_parameters:
-      name: equivariant
-      time_embedding_irreps: "8x0e"
-      gate: "silu"
-      number_of_layers: 3
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_batchsize: None
-    sample_every_n_epochs: 1
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - csv
-  - tensorboard
-  - comet
diff --git a/examples/mila_cluster/diffusion/config_mace_mlp_head.yaml b/examples/mila_cluster/diffusion/config_mace_mlp_head.yaml
deleted file mode 100644
index 94700c8c..00000000
--- a/examples/mila_cluster/diffusion/config_mace_mlp_head.yaml
+++ /dev/null
@@ -1,87 +0,0 @@
-# general
-exp_name: mace_mlp_head_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 512
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: mace
-    use_pretrained: None
-    pretrained_weights_path: ./
-    number_of_atoms: 8
-    r_max: 5.0
-    num_bessel: 8
-    num_polynomial_cutoff: 5
-    max_ell: 2
-    interaction_cls: RealAgnosticResidualInteractionBlock
-    interaction_cls_first: RealAgnosticInteractionBlock
-    num_interactions: 2
-    hidden_irreps: 8x0e + 8x1o
-    MLP_irreps: 8x0e
-    avg_num_neighbors: 1
-    correlation: 3
-    gate: silu
-    radial_MLP: [8, 8, 8]
-    radial_type: bessel
-    prediction_head_parameters:
-      name: mlp
-      hidden_dimensions_size: 8
-      n_hidden_dimensions: 3
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_batchsize: None
-    sample_every_n_epochs: 1
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - csv
-  - tensorboard
-  - comet
diff --git a/examples/narval/diffusion/config_diffusion.yaml b/examples/narval/diffusion/config_diffusion.yaml
deleted file mode 100644
index ae4c2963..00000000
--- a/examples/narval/diffusion/config_diffusion.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# general
-exp_name: example_narval
-run_name: run1
-max_epoch: 3
-log_every_n_steps: 1
-# fast_dev_run: True
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-gradient_clipping: 0
-
-# data
-data:
-  batch_size: 128
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    n_hidden_dimensions: 2
-    hidden_dimensions_size: 64
-  noise:
-    total_time_steps: 10
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value
-
-# optimizer
-optimizer:
-  name: adam
-  learning_rate: 0.001
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 100
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-logging:
-  - csv
-  - comet

From 37ccedd80d6cd3fb3f834dd1dbc023a0485df613 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 14:14:33 -0400
Subject: [PATCH 05/13] more example clean-up

---
 .../config_diffusion_mace_orion.yaml          | 86 +++++++++++++++++++
 .../diffusion/config_diffusion_mlp_orion.yaml | 79 +++++++++++++++++
 examples/local/diffusion/run_diffusion.sh     |  2 +-
 .../diffusion/config_diffusion.yaml           | 45 ----------
 examples/local_orion/diffusion/run_orion.sh   |  2 +-
 .../diffusion/config_diffusion.yaml           | 45 ----------
 6 files changed, 167 insertions(+), 92 deletions(-)
 create mode 100644 examples/config_files/diffusion/config_diffusion_mace_orion.yaml
 create mode 100644 examples/config_files/diffusion/config_diffusion_mlp_orion.yaml
 delete mode 100644 examples/local_orion/diffusion/config_diffusion.yaml
 delete mode 100644 examples/narval_orion/diffusion/config_diffusion.yaml

diff --git a/examples/config_files/diffusion/config_diffusion_mace_orion.yaml b/examples/config_files/diffusion/config_diffusion_mace_orion.yaml
new file mode 100644
index 00000000..1daf775b
--- /dev/null
+++ b/examples/config_files/diffusion/config_diffusion_mace_orion.yaml
@@ -0,0 +1,86 @@
+# general
+exp_name: diffusion_mace_example
+run_name: run_debug_delete_me
+max_epoch: 10
+log_every_n_steps: 1
+gradient_clipping: 0.1
+accumulate_grad_batches: 1  # make this number of forward passes before doing a backprop step
+
+# set to null to avoid setting a seed (can speed up GPU computation, but
+# results will not be reproducible)
+seed: 1234
+
+# data
+data:
+  batch_size: 512
+  num_workers: 0
+  max_atom: 8
+
+# architecture
+spatial_dimension: 3
+model:
+  score_network:
+    architecture: diffusion_mace
+    number_of_atoms: 8
+    r_max: 5.0
+    num_bessel: 8
+    num_polynomial_cutoff: 5
+    max_ell: 2
+    interaction_cls: RealAgnosticResidualInteractionBlock
+    interaction_cls_first: RealAgnosticInteractionBlock
+    num_interactions: 2
+    hidden_irreps: 'orion~choices(["8x0e + 8x1o", "16x0e + 16x1o + 16x2e", "32x0e + 32x1o + 32x2e + 32x3o"])'
+    mlp_irreps: 'orion~choices(["8x0e", "32x0e"])'
+    number_of_mlp_layers: 0
+    avg_num_neighbors: 1
+    correlation: 3
+    gate: silu
+    radial_MLP: 'orion~choices([[8, 8, 8], [32, 32, 32], [64, 64]])'
+    radial_type: bessel
+    conditional_prob: 'orion~choices([0.0, 0.25, 0.5, 0.75])'
+    conditional_gamma: 2
+    condition_embedding_size: 'orion~choices([32, 64])'
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001  # default value
+    sigma_max: 0.5  # default value'
+
+# optimizer and scheduler
+optimizer:
+  name: adamw
+  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
+  weight_decay: 1.0e-6
+
+scheduler:
+  name: ReduceLROnPlateau
+  factor: 0.1
+  patience: 3
+
+# early stopping
+early_stopping:
+  metric: validation_epoch_loss
+  mode: min
+  patience: 10
+
+model_checkpoint:
+  monitor: validation_epoch_loss
+  mode: min
+
+# Sampling from the generative model
+diffusion_sampling:
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001  # default value
+    sigma_max: 0.5  # default value
+  sampling:
+    spatial_dimension: 3
+    number_of_corrector_steps: 1
+    number_of_atoms: 8
+    number_of_samples: 16
+    sample_every_n_epochs: 1
+    cell_dimensions: [5.43, 5.43, 5.43]
+
+logging:
+  #  - csv
+  #  - tensorboard
+  - comet
diff --git a/examples/config_files/diffusion/config_diffusion_mlp_orion.yaml b/examples/config_files/diffusion/config_diffusion_mlp_orion.yaml
new file mode 100644
index 00000000..91a2bd19
--- /dev/null
+++ b/examples/config_files/diffusion/config_diffusion_mlp_orion.yaml
@@ -0,0 +1,79 @@
+# general
+exp_name: mlp_example
+run_name: run_debug_delete_me
+max_epoch: 10
+log_every_n_steps: 1
+gradient_clipping: 0
+accumulate_grad_batches: 1  # make this number of forward passes before doing a backprop step
+
+# set to null to avoid setting a seed (can speed up GPU computation, but
+# results will not be reproducible)
+seed: 1234
+
+# data
+data:
+  batch_size: 1024
+  num_workers: 0
+  max_atom: 8
+
+# architecture
+spatial_dimension: 3
+model:
+  score_network:
+    architecture: mlp
+    number_of_atoms: 8
+    n_hidden_dimensions: 'orion~choices([1, 2, 3, 4])'
+    hidden_dimensions_size: 'orion~choices([16, 32, 64])'
+    conditional_prob: 'orion~choices([0.0, 0.25, 0.5])'
+    conditional_gamma: 2
+    condition_embedding_size: 'orion~choices([32, 64])'
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.005  # default value
+    sigma_max: 0.5  # default value'
+
+# optimizer and scheduler
+optimizer:
+  name: adamw
+  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
+  weight_decay: 1.0e-6
+
+scheduler:
+  name: ReduceLROnPlateau
+  factor: 0.1
+  patience: 3
+
+# early stopping
+early_stopping:
+  metric: validation_epoch_loss
+  mode: min
+  patience: 10
+
+model_checkpoint:
+  monitor: validation_epoch_loss
+  mode: min
+
+# A callback to check the loss vs. sigma
+loss_monitoring: 
+  number_of_bins: 50
+  sample_every_n_epochs: 2
+
+# Sampling from the generative model
+diffusion_sampling:
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001  # default value
+    sigma_max: 0.5  # default value
+  sampling:
+    spatial_dimension: 3
+    number_of_corrector_steps: 1
+    number_of_atoms: 8
+    number_of_samples: 16
+    sample_batchsize: None
+    sample_every_n_epochs: 2
+    cell_dimensions: [5.43, 5.43, 5.43]
+
+logging:
+  - comet
+#- tensorboard
+#- csv
diff --git a/examples/local/diffusion/run_diffusion.sh b/examples/local/diffusion/run_diffusion.sh
index 6860d6ca..ca54f033 100755
--- a/examples/local/diffusion/run_diffusion.sh
+++ b/examples/local/diffusion/run_diffusion.sh
@@ -3,7 +3,7 @@
 # This example assumes that the dataset 'si_diffusion_small' is present locally in the DATA folder.
 # It is also assumed that the user has a Comet account for logging experiments.
 
-CONFIG=config_diffusion_mace.yaml
+CONFIG=../../config_files/diffusion/config_diffusion_mace.yaml
 DATA_DIR=../../../data/si_diffusion_1x1x1
 PROCESSED_DATA=${DATA_DIR}/processed
 DATA_WORK_DIR=./tmp_work_dir/
diff --git a/examples/local_orion/diffusion/config_diffusion.yaml b/examples/local_orion/diffusion/config_diffusion.yaml
deleted file mode 100644
index e2103835..00000000
--- a/examples/local_orion/diffusion/config_diffusion.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-# general
-exp_name: example_experiment
-max_epoch: 2
-log_every_n_steps: 1
-# fast_dev_run: True
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-gradient_clipping: 0
-
-# data
-data:
-  batch_size: 128
-  num_workers: 4
-  max_atom: 64
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    n_hidden_dimensions: 'orion~choices([1, 2, 3, 4])'
-    hidden_dimensions_size: 'orion~choices([256, 512, 1024, 2048])'
-  noise:
-    total_time_steps: 'orion~uniform(2, 20, discrete=True)'
-    sigma_min: 'orion~choices([0.001, 0.005, 0.01])'
-    sigma_max: 'orion~choices([0.1, 0.5, 0.75])'
-
-# optimizer
-optimizer:
-  name: adam
-  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
-
-# early stopping
-early_stopping:
-  metric: val_loss
-  mode: min
-  patience: 100
-
-model_checkpoint:
-  monitor: val_loss
-  mode: min
-
-logging:
-  - tensorboard
-  - comet
diff --git a/examples/local_orion/diffusion/run_orion.sh b/examples/local_orion/diffusion/run_orion.sh
index 254b0e15..927a1753 100755
--- a/examples/local_orion/diffusion/run_orion.sh
+++ b/examples/local_orion/diffusion/run_orion.sh
@@ -2,7 +2,7 @@ export ORION_DB_ADDRESS='orion_db.pkl'
 export ORION_DB_TYPE='pickleddb'
 
 ROOT_DIR=../../../
-CONFIG=config_diffusion.yaml
+CONFIG=../../config_files/diffusion/config_diffusion_mlp_orion.yaml
 DATA_DIR=${ROOT_DIR}/data/si_diffusion_small
 PROCESSED_DATA=${DATA_DIR}/processed
 DATA_WORK_DIR=./tmp_work_dir/
diff --git a/examples/narval_orion/diffusion/config_diffusion.yaml b/examples/narval_orion/diffusion/config_diffusion.yaml
deleted file mode 100644
index 65c433d9..00000000
--- a/examples/narval_orion/diffusion/config_diffusion.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-# general
-exp_name: example_experiment
-max_epoch: 2
-log_every_n_steps: 1
-# fast_dev_run: True
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-gradient_clipping: 0
-
-# data
-data:
-  batch_size: 128
-  num_workers: 4
-  max_atom: 64
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    n_hidden_dimensions: 'orion~choices([1, 2, 3, 4])'
-    hidden_dimensions_size: 'orion~choices([256, 512, 1024, 2048])'
-  noise:
-    total_time_steps: 'orion~uniform(2, 20, discrete=True)'
-    sigma_min: 'orion~choices([0.001, 0.005, 0.01])'
-    sigma_max: 'orion~choices([0.1, 0.5, 0.75])'
-
-# optimizer
-optimizer:
-  name: adam
-  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 100
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-logging:
-  - tensorboard
-  - comet

From 225ea0d7c78d97f0f54982d957b9b923e1874413 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 15:13:59 -0400
Subject: [PATCH 06/13] fixing a bug with biases in condition_embedding_layer

---
 crystal_diffusion/models/diffusion_mace.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/crystal_diffusion/models/diffusion_mace.py b/crystal_diffusion/models/diffusion_mace.py
index 91b25772..840f9f59 100644
--- a/crystal_diffusion/models/diffusion_mace.py
+++ b/crystal_diffusion/models/diffusion_mace.py
@@ -259,11 +259,10 @@ def __init__(
 
         # Apply a MLP with a bias on the forces as a conditional feature. This would be a 1o irrep
         forces_irreps_in = o3.Irreps("1x1o")
-        # the l=0 irreps is there to allow a bias in the embedding
-        forces_irreps_embedding = o3.Irreps(f"{condition_embedding_size}x0e + {condition_embedding_size}x1o")
+        forces_irreps_embedding = o3.Irreps(f"{condition_embedding_size}x1o")
         self.condition_embedding_layer = o3.Linear(irreps_in=forces_irreps_in,
                                                    irreps_out=forces_irreps_embedding,
-                                                   biases=True)
+                                                   biases=False)  # can't have biases with 1o irreps
 
         # conditional layers for the forces as a conditional feature to guide the diffusion
         self.conditional_layers = torch.nn.ModuleList([])

From 18cb6a284082c4d828f5e7753084035c3eaa978b Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 15:58:22 -0400
Subject: [PATCH 07/13] removing bias in conditional_layers

---
 crystal_diffusion/models/diffusion_mace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crystal_diffusion/models/diffusion_mace.py b/crystal_diffusion/models/diffusion_mace.py
index 840f9f59..a7a691fe 100644
--- a/crystal_diffusion/models/diffusion_mace.py
+++ b/crystal_diffusion/models/diffusion_mace.py
@@ -270,7 +270,7 @@ def __init__(
             cond_layer = o3.Linear(
                 irreps_in=forces_irreps_embedding,
                 irreps_out=hidden_irreps_out,
-                biases=True
+                biases=False
             )
             self.conditional_layers.append(cond_layer)
 

From e74cbf25c67aca38cfe980783b910fc1c97f0d39 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 16:11:22 -0400
Subject: [PATCH 08/13] fixing unit test

---
 tests/models/test_diffusion_mace.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/models/test_diffusion_mace.py b/tests/models/test_diffusion_mace.py
index e03113ae..f6e7ef46 100644
--- a/tests/models/test_diffusion_mace.py
+++ b/tests/models/test_diffusion_mace.py
@@ -6,7 +6,8 @@
 from crystal_diffusion.models.diffusion_mace import (DiffusionMACE,
                                                      LinearVectorReadoutBlock,
                                                      input_to_diffusion_mace)
-from crystal_diffusion.namespace import (NOISE, NOISY_CARTESIAN_POSITIONS,
+from crystal_diffusion.namespace import (CARTESIAN_FORCES, NOISE,
+                                         NOISY_CARTESIAN_POSITIONS,
                                          NOISY_RELATIVE_COORDINATES, TIME,
                                          UNIT_CELL)
 from crystal_diffusion.utils.basis_transformations import (
@@ -88,12 +89,17 @@ def noises(self, batch_size):
         return 0.5 * torch.rand(batch_size, 1)
 
     @pytest.fixture(scope='class')
-    def batch(self, relative_coordinates, cartesian_positions, basis_vectors, times, noises):
+    def forces(self, batch_size, spatial_dimension):
+        return 0.5 * torch.rand(batch_size, spatial_dimension)
+
+    @pytest.fixture(scope='class')
+    def batch(self, relative_coordinates, cartesian_positions, basis_vectors, times, noises, forces):
         batch = {NOISY_RELATIVE_COORDINATES: relative_coordinates,
                  NOISY_CARTESIAN_POSITIONS: cartesian_positions,
                  TIME: times,
                  NOISE: noises,
-                 UNIT_CELL: basis_vectors}
+                 UNIT_CELL: basis_vectors,
+                 CARTESIAN_FORCES: forces}
         return batch
 
     @pytest.fixture(scope='class')

From cbc587158a978ebe4ae004c13e3e05897d3f3cbb Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Tue, 11 Jun 2024 15:21:37 -0400
Subject: [PATCH 09/13] rm mila cluster config_diffusion_mace

---
 crystal_diffusion/models/diffusion_mace.py    | 40 +++++++--
 .../diffusion_mace_score_network.py           |  7 +-
 .../diffusion/config_diffusion_mace.yaml      |  3 +
 .../diffusion/config_diffusion_mlp.yaml       |  5 +-
 .../config_mace_equivariant_head.yaml         |  0
 .../diffusion/config_mace_mlp_head.yaml       |  0
 .../diffusion/config_diffusion_mace.yaml      | 82 -----------------
 .../diffusion/config_diffusion_mlp.yaml       | 77 ----------------
 .../config_mace_equivariant_head.yaml         | 86 ------------------
 .../diffusion/config_mace_mlp_head.yaml       | 87 -------------------
 .../narval/diffusion/config_diffusion.yaml    | 46 ----------
 11 files changed, 44 insertions(+), 389 deletions(-)
 rename examples/{local => config_files}/diffusion/config_diffusion_mace.yaml (95%)
 rename examples/{local => config_files}/diffusion/config_diffusion_mlp.yaml (97%)
 rename examples/{local => config_files}/diffusion/config_mace_equivariant_head.yaml (100%)
 rename examples/{local => config_files}/diffusion/config_mace_mlp_head.yaml (100%)
 delete mode 100644 examples/mila_cluster/diffusion/config_diffusion_mace.yaml
 delete mode 100644 examples/mila_cluster/diffusion/config_diffusion_mlp.yaml
 delete mode 100644 examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml
 delete mode 100644 examples/mila_cluster/diffusion/config_mace_mlp_head.yaml
 delete mode 100644 examples/narval/diffusion/config_diffusion.yaml

diff --git a/crystal_diffusion/models/diffusion_mace.py b/crystal_diffusion/models/diffusion_mace.py
index 80b79f1c..91b25772 100644
--- a/crystal_diffusion/models/diffusion_mace.py
+++ b/crystal_diffusion/models/diffusion_mace.py
@@ -9,8 +9,8 @@
 from torch_geometric.data import Data
 
 from crystal_diffusion.models.mace_utils import get_adj_matrix
-from crystal_diffusion.namespace import (NOISE, NOISY_CARTESIAN_POSITIONS,
-                                         UNIT_CELL)
+from crystal_diffusion.namespace import (CARTESIAN_FORCES, NOISE,
+                                         NOISY_CARTESIAN_POSITIONS, UNIT_CELL)
 
 
 class LinearVectorReadoutBlock(torch.nn.Module):
@@ -64,6 +64,9 @@ def input_to_diffusion_mace(batch: Dict[AnyStr, torch.Tensor], radial_cutoff: fl
 
     flat_basis_vectors = basis_vectors.view(-1, spatial_dimension)  # batch * spatial_dimension, spatial_dimension
     # create the pytorch-geometric graph
+
+    forces = batch[CARTESIAN_FORCES].view(-1, spatial_dimension)  # batch * n_atom_per_graph, spatial dimension
+
     graph_data = Data(edge_index=adj_matrix,
                       node_attrs=node_attrs.to(device),
                       node_diffusion_scalars=node_diffusion_scalars.to(device),
@@ -71,7 +74,8 @@ def input_to_diffusion_mace(batch: Dict[AnyStr, torch.Tensor], radial_cutoff: fl
                       ptr=ptr.to(device),
                       batch=batch_tensor.to(device),
                       shifts=shift_matrix,
-                      cell=flat_basis_vectors
+                      cell=flat_basis_vectors,
+                      forces=forces,
                       )
     return graph_data
 
@@ -102,6 +106,7 @@ def __init__(
         gate: Optional[Callable],
         radial_MLP: List[int],
         radial_type: Optional[str] = "bessel",
+        condition_embedding_size: int = 64  # dimension of the conditional variable embedding - assumed to be l=1 (odd)
     ):
         """Init method."""
         assert num_elements == 1, "only a single element can be used at this time. Set 'num_elements' to 1."
@@ -142,8 +147,8 @@ def __init__(
                            irreps_out=diffusion_scalar_irreps_out,
                            biases=True)
         self.diffusion_scalar_embedding.append(linear)
+        non_linearity = Activation(irreps_in=diffusion_scalar_irreps_out, acts=[gate])
         for _ in range(number_of_mlp_layers):
-            non_linearity = Activation(irreps_in=diffusion_scalar_irreps_out, acts=[gate])
             self.diffusion_scalar_embedding.append(non_linearity)
 
             linear = o3.Linear(irreps_in=diffusion_scalar_irreps_out,
@@ -252,7 +257,25 @@ def __init__(
         # the output is a single vector.
         self.vector_readout = LinearVectorReadoutBlock(irreps_in=hidden_irreps_out)
 
-    def forward(self, data: Dict[str, torch.Tensor]) -> torch.Tensor:
+        # Apply a MLP with a bias on the forces as a conditional feature. This would be a 1o irrep
+        forces_irreps_in = o3.Irreps("1x1o")
+        # the l=0 irreps is there to allow a bias in the embedding
+        forces_irreps_embedding = o3.Irreps(f"{condition_embedding_size}x0e + {condition_embedding_size}x1o")
+        self.condition_embedding_layer = o3.Linear(irreps_in=forces_irreps_in,
+                                                   irreps_out=forces_irreps_embedding,
+                                                   biases=True)
+
+        # conditional layers for the forces as a conditional feature to guide the diffusion
+        self.conditional_layers = torch.nn.ModuleList([])
+        for _ in range(num_interactions):
+            cond_layer = o3.Linear(
+                irreps_in=forces_irreps_embedding,
+                irreps_out=hidden_irreps_out,
+                biases=True
+            )
+            self.conditional_layers.append(cond_layer)
+
+    def forward(self, data: Dict[str, torch.Tensor], conditional: bool = False) -> torch.Tensor:
         """Forward method."""
         # Setup
 
@@ -271,7 +294,9 @@ def forward(self, data: Dict[str, torch.Tensor]) -> torch.Tensor:
         edge_attrs = self.spherical_harmonics(vectors)
         edge_feats = self.radial_embedding(lengths)
 
-        for interaction, product in zip(self.interactions, self.products):
+        forces_embedding = self.condition_embedding_layer(data["forces"])  # 0e + 1o embedding
+
+        for interaction, product, cond_layer in zip(self.interactions, self.products, self.conditional_layers):
             node_feats, sc = interaction(
                 node_attrs=augmented_node_attributes,
                 node_feats=node_feats,
@@ -284,6 +309,9 @@ def forward(self, data: Dict[str, torch.Tensor]) -> torch.Tensor:
                 sc=sc,
                 node_attrs=augmented_node_attributes,
             )
+            if conditional:  # modify the node features to account for the conditional features i.e. forces
+                force_embed = cond_layer(forces_embedding)
+                node_feats += force_embed
 
         # Outputs
         vectors_output = self.vector_readout(node_feats)
diff --git a/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py b/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py
index d7ac688c..c8fc2330 100644
--- a/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py
+++ b/crystal_diffusion/models/score_networks/diffusion_mace_score_network.py
@@ -37,6 +37,7 @@ class DiffusionMACEScoreNetworkParameters(ScoreNetworkParameters):
     gate: str = "silu"  # non linearity for last readout - choices: ["silu", "tanh", "abs", "None"]
     radial_MLP: List[int] = field(default_factory=lambda: [64, 64, 64])  # "width of the radial MLP"
     radial_type: str = "bessel"  # type of radial basis functions - choices=["bessel", "gaussian", "chebyshev"]
+    condition_embedding_size: int = 64  # dimension of the conditional variable embedding - assumed to be l=1 (odd)
 
 
 class DiffusionMACEScoreNetwork(ScoreNetwork):
@@ -71,7 +72,8 @@ def __init__(self, hyper_params: DiffusionMACEScoreNetworkParameters):
             correlation=hyper_params.correlation,
             gate=gate_dict[hyper_params.gate],
             radial_MLP=hyper_params.radial_MLP,
-            radial_type=hyper_params.radial_type
+            radial_type=hyper_params.radial_type,
+            condition_embedding_size=hyper_params.condition_embedding_size,
         )
 
         self._natoms = hyper_params.number_of_atoms
@@ -100,7 +102,6 @@ def _forward_unchecked(self, batch: Dict[AnyStr, torch.Tensor], conditional: boo
         Returns:
             output : the scores computed by the model as a [batch_size, n_atom, spatial_dimension] tensor.
         """
-        del conditional  # TODO do something with forces when conditional
         relative_coordinates = batch[NOISY_RELATIVE_COORDINATES]
         batch_size, number_of_atoms, spatial_dimension = relative_coordinates.shape
 
@@ -108,7 +109,7 @@ def _forward_unchecked(self, batch: Dict[AnyStr, torch.Tensor], conditional: boo
         batch[NOISY_CARTESIAN_POSITIONS] = get_positions_from_coordinates(relative_coordinates, basis_vectors)
         graph_input = input_to_diffusion_mace(batch, radial_cutoff=self.r_max)
 
-        flat_cartesian_scores = self.diffusion_mace_network(graph_input)
+        flat_cartesian_scores = self.diffusion_mace_network(graph_input, conditional)
         cartesian_scores = flat_cartesian_scores.reshape(batch_size, number_of_atoms, spatial_dimension)
 
         reciprocal_basis_vectors_as_columns = get_reciprocal_basis_vectors(basis_vectors)
diff --git a/examples/local/diffusion/config_diffusion_mace.yaml b/examples/config_files/diffusion/config_diffusion_mace.yaml
similarity index 95%
rename from examples/local/diffusion/config_diffusion_mace.yaml
rename to examples/config_files/diffusion/config_diffusion_mace.yaml
index a9c4df79..6d2fc1b2 100644
--- a/examples/local/diffusion/config_diffusion_mace.yaml
+++ b/examples/config_files/diffusion/config_diffusion_mace.yaml
@@ -37,6 +37,9 @@ model:
     gate: silu
     radial_MLP: [8, 8, 8]
     radial_type: bessel
+    conditional_prob: 0.0
+    conditional_gamma: 2
+    condition_embedding_size: 64
   noise:
     total_time_steps: 100
     sigma_min: 0.001  # default value
diff --git a/examples/local/diffusion/config_diffusion_mlp.yaml b/examples/config_files/diffusion/config_diffusion_mlp.yaml
similarity index 97%
rename from examples/local/diffusion/config_diffusion_mlp.yaml
rename to examples/config_files/diffusion/config_diffusion_mlp.yaml
index 983c9be3..a27b789f 100644
--- a/examples/local/diffusion/config_diffusion_mlp.yaml
+++ b/examples/config_files/diffusion/config_diffusion_mlp.yaml
@@ -21,11 +21,12 @@ spatial_dimension: 3
 model:
   score_network:
     architecture: mlp
-    conditional_prob: 0.0
-    conditional_gamma: 2
     number_of_atoms: 8
     n_hidden_dimensions: 2
     hidden_dimensions_size: 64
+    conditional_prob: 0.0
+    conditional_gamma: 2
+    condition_embedding_size: 64
   noise:
     total_time_steps: 100
     sigma_min: 0.005  # default value
diff --git a/examples/local/diffusion/config_mace_equivariant_head.yaml b/examples/config_files/diffusion/config_mace_equivariant_head.yaml
similarity index 100%
rename from examples/local/diffusion/config_mace_equivariant_head.yaml
rename to examples/config_files/diffusion/config_mace_equivariant_head.yaml
diff --git a/examples/local/diffusion/config_mace_mlp_head.yaml b/examples/config_files/diffusion/config_mace_mlp_head.yaml
similarity index 100%
rename from examples/local/diffusion/config_mace_mlp_head.yaml
rename to examples/config_files/diffusion/config_mace_mlp_head.yaml
diff --git a/examples/mila_cluster/diffusion/config_diffusion_mace.yaml b/examples/mila_cluster/diffusion/config_diffusion_mace.yaml
deleted file mode 100644
index 145a0133..00000000
--- a/examples/mila_cluster/diffusion/config_diffusion_mace.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-# general
-exp_name: diffusion_mace_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-gradient_clipping: 0.1
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 512
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: diffusion_mace
-    number_of_atoms: 8
-    r_max: 5.0
-    num_bessel: 8
-    num_polynomial_cutoff: 5
-    max_ell: 2
-    interaction_cls: RealAgnosticResidualInteractionBlock
-    interaction_cls_first: RealAgnosticInteractionBlock
-    num_interactions: 2
-    hidden_irreps: 8x0e + 8x1o
-    mlp_irreps: 8x0e
-    number_of_mlp_layers: 0
-    avg_num_neighbors: 1
-    correlation: 3
-    gate: silu
-    radial_MLP: [8, 8, 8]
-    radial_type: bessel
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.001  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.001  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_every_n_epochs: 1
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - csv
-  - tensorboard
-  - comet
diff --git a/examples/mila_cluster/diffusion/config_diffusion_mlp.yaml b/examples/mila_cluster/diffusion/config_diffusion_mlp.yaml
deleted file mode 100644
index 0f55a9d4..00000000
--- a/examples/mila_cluster/diffusion/config_diffusion_mlp.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# general
-exp_name: mlp_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-gradient_clipping: 0
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 1024
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: mlp
-    conditional_prob: 0.0
-    conditional_gamma: 2
-    number_of_atoms: 8
-    n_hidden_dimensions: 2
-    hidden_dimensions_size: 64
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# A callback to check the loss vs. sigma
-loss_monitoring: 
-  number_of_bins: 50
-  sample_every_n_epochs: 2
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.001  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_batchsize: None
-    sample_every_n_epochs: 2
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - comet
-#- tensorboard
-#- csv
diff --git a/examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml b/examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml
deleted file mode 100644
index ebb99d36..00000000
--- a/examples/mila_cluster/diffusion/config_mace_equivariant_head.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-# general
-exp_name: mace_equivariant_head_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 1024
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: mace
-    number_of_atoms: 8  
-    r_max: 5.0
-    num_bessel: 8
-    num_polynomial_cutoff: 5
-    max_ell: 2
-    interaction_cls: RealAgnosticResidualInteractionBlock
-    interaction_cls_first: RealAgnosticInteractionBlock
-    num_interactions: 2
-    hidden_irreps: 8x0e + 8x1o
-    MLP_irreps: 8x0e
-    avg_num_neighbors: 1
-    correlation: 3
-    gate: silu
-    radial_MLP: [8, 8, 8]
-    radial_type: bessel
-    prediction_head_parameters:
-      name: equivariant
-      time_embedding_irreps: "8x0e"
-      gate: "silu"
-      number_of_layers: 3
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_batchsize: None
-    sample_every_n_epochs: 1
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - csv
-  - tensorboard
-  - comet
diff --git a/examples/mila_cluster/diffusion/config_mace_mlp_head.yaml b/examples/mila_cluster/diffusion/config_mace_mlp_head.yaml
deleted file mode 100644
index 94700c8c..00000000
--- a/examples/mila_cluster/diffusion/config_mace_mlp_head.yaml
+++ /dev/null
@@ -1,87 +0,0 @@
-# general
-exp_name: mace_mlp_head_example
-run_name: run_debug_delete_me
-max_epoch: 10
-log_every_n_steps: 1
-
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-
-# data
-data:
-  batch_size: 512
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    architecture: mace
-    use_pretrained: None
-    pretrained_weights_path: ./
-    number_of_atoms: 8
-    r_max: 5.0
-    num_bessel: 8
-    num_polynomial_cutoff: 5
-    max_ell: 2
-    interaction_cls: RealAgnosticResidualInteractionBlock
-    interaction_cls_first: RealAgnosticInteractionBlock
-    num_interactions: 2
-    hidden_irreps: 8x0e + 8x1o
-    MLP_irreps: 8x0e
-    avg_num_neighbors: 1
-    correlation: 3
-    gate: silu
-    radial_MLP: [8, 8, 8]
-    radial_type: bessel
-    prediction_head_parameters:
-      name: mlp
-      hidden_dimensions_size: 8
-      n_hidden_dimensions: 3
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value'
-
-# optimizer and scheduler
-optimizer:
-  name: adamw
-  learning_rate: 0.001
-  weight_decay: 1.0e-6
-
-scheduler:
-  name: ReduceLROnPlateau
-  factor: 0.1
-  patience: 3
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 10
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-# Sampling from the generative model
-diffusion_sampling:
-  noise:
-    total_time_steps: 100
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value
-  sampling:
-    spatial_dimension: 3
-    number_of_corrector_steps: 1
-    number_of_atoms: 8
-    number_of_samples: 16
-    sample_batchsize: None
-    sample_every_n_epochs: 1
-    cell_dimensions: [5.43, 5.43, 5.43]
-
-logging:
-  - csv
-  - tensorboard
-  - comet
diff --git a/examples/narval/diffusion/config_diffusion.yaml b/examples/narval/diffusion/config_diffusion.yaml
deleted file mode 100644
index ae4c2963..00000000
--- a/examples/narval/diffusion/config_diffusion.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# general
-exp_name: example_narval
-run_name: run1
-max_epoch: 3
-log_every_n_steps: 1
-# fast_dev_run: True
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-gradient_clipping: 0
-
-# data
-data:
-  batch_size: 128
-  num_workers: 0
-  max_atom: 8
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    n_hidden_dimensions: 2
-    hidden_dimensions_size: 64
-  noise:
-    total_time_steps: 10
-    sigma_min: 0.005  # default value
-    sigma_max: 0.5  # default value
-
-# optimizer
-optimizer:
-  name: adam
-  learning_rate: 0.001
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 100
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-logging:
-  - csv
-  - comet

From 3ddc3f59422f3205a3bce0a1045e7c492936b747 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 14:14:33 -0400
Subject: [PATCH 10/13] more example clean-up

---
 .../config_diffusion_mace_orion.yaml          | 86 +++++++++++++++++++
 .../diffusion/config_diffusion_mlp_orion.yaml | 79 +++++++++++++++++
 examples/local/diffusion/run_diffusion.sh     |  2 +-
 .../diffusion/config_diffusion.yaml           | 45 ----------
 examples/local_orion/diffusion/run_orion.sh   |  2 +-
 .../diffusion/config_diffusion.yaml           | 45 ----------
 6 files changed, 167 insertions(+), 92 deletions(-)
 create mode 100644 examples/config_files/diffusion/config_diffusion_mace_orion.yaml
 create mode 100644 examples/config_files/diffusion/config_diffusion_mlp_orion.yaml
 delete mode 100644 examples/local_orion/diffusion/config_diffusion.yaml
 delete mode 100644 examples/narval_orion/diffusion/config_diffusion.yaml

diff --git a/examples/config_files/diffusion/config_diffusion_mace_orion.yaml b/examples/config_files/diffusion/config_diffusion_mace_orion.yaml
new file mode 100644
index 00000000..1daf775b
--- /dev/null
+++ b/examples/config_files/diffusion/config_diffusion_mace_orion.yaml
@@ -0,0 +1,86 @@
+# general
+exp_name: diffusion_mace_example
+run_name: run_debug_delete_me
+max_epoch: 10
+log_every_n_steps: 1
+gradient_clipping: 0.1
+accumulate_grad_batches: 1  # make this number of forward passes before doing a backprop step
+
+# set to null to avoid setting a seed (can speed up GPU computation, but
+# results will not be reproducible)
+seed: 1234
+
+# data
+data:
+  batch_size: 512
+  num_workers: 0
+  max_atom: 8
+
+# architecture
+spatial_dimension: 3
+model:
+  score_network:
+    architecture: diffusion_mace
+    number_of_atoms: 8
+    r_max: 5.0
+    num_bessel: 8
+    num_polynomial_cutoff: 5
+    max_ell: 2
+    interaction_cls: RealAgnosticResidualInteractionBlock
+    interaction_cls_first: RealAgnosticInteractionBlock
+    num_interactions: 2
+    hidden_irreps: 'orion~choices(["8x0e + 8x1o", "16x0e + 16x1o + 16x2e", "32x0e + 32x1o + 32x2e + 32x3o"])'
+    mlp_irreps: 'orion~choices(["8x0e", "32x0e"])'
+    number_of_mlp_layers: 0
+    avg_num_neighbors: 1
+    correlation: 3
+    gate: silu
+    radial_MLP: 'orion~choices([[8, 8, 8], [32, 32, 32], [64, 64]])'
+    radial_type: bessel
+    conditional_prob: 'orion~choices([0.0, 0.25, 0.5, 0.75])'
+    conditional_gamma: 2
+    condition_embedding_size: 'orion~choices([32, 64])'
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001  # default value
+    sigma_max: 0.5  # default value'
+
+# optimizer and scheduler
+optimizer:
+  name: adamw
+  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
+  weight_decay: 1.0e-6
+
+scheduler:
+  name: ReduceLROnPlateau
+  factor: 0.1
+  patience: 3
+
+# early stopping
+early_stopping:
+  metric: validation_epoch_loss
+  mode: min
+  patience: 10
+
+model_checkpoint:
+  monitor: validation_epoch_loss
+  mode: min
+
+# Sampling from the generative model
+diffusion_sampling:
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001  # default value
+    sigma_max: 0.5  # default value
+  sampling:
+    spatial_dimension: 3
+    number_of_corrector_steps: 1
+    number_of_atoms: 8
+    number_of_samples: 16
+    sample_every_n_epochs: 1
+    cell_dimensions: [5.43, 5.43, 5.43]
+
+logging:
+  #  - csv
+  #  - tensorboard
+  - comet
diff --git a/examples/config_files/diffusion/config_diffusion_mlp_orion.yaml b/examples/config_files/diffusion/config_diffusion_mlp_orion.yaml
new file mode 100644
index 00000000..91a2bd19
--- /dev/null
+++ b/examples/config_files/diffusion/config_diffusion_mlp_orion.yaml
@@ -0,0 +1,79 @@
+# general
+exp_name: mlp_example
+run_name: run_debug_delete_me
+max_epoch: 10
+log_every_n_steps: 1
+gradient_clipping: 0
+accumulate_grad_batches: 1  # make this number of forward passes before doing a backprop step
+
+# set to null to avoid setting a seed (can speed up GPU computation, but
+# results will not be reproducible)
+seed: 1234
+
+# data
+data:
+  batch_size: 1024
+  num_workers: 0
+  max_atom: 8
+
+# architecture
+spatial_dimension: 3
+model:
+  score_network:
+    architecture: mlp
+    number_of_atoms: 8
+    n_hidden_dimensions: 'orion~choices([1, 2, 3, 4])'
+    hidden_dimensions_size: 'orion~choices([16, 32, 64])'
+    conditional_prob: 'orion~choices([0.0, 0.25, 0.5])'
+    conditional_gamma: 2
+    condition_embedding_size: 'orion~choices([32, 64])'
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.005  # default value
+    sigma_max: 0.5  # default value'
+
+# optimizer and scheduler
+optimizer:
+  name: adamw
+  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
+  weight_decay: 1.0e-6
+
+scheduler:
+  name: ReduceLROnPlateau
+  factor: 0.1
+  patience: 3
+
+# early stopping
+early_stopping:
+  metric: validation_epoch_loss
+  mode: min
+  patience: 10
+
+model_checkpoint:
+  monitor: validation_epoch_loss
+  mode: min
+
+# A callback to check the loss vs. sigma
+loss_monitoring: 
+  number_of_bins: 50
+  sample_every_n_epochs: 2
+
+# Sampling from the generative model
+diffusion_sampling:
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001  # default value
+    sigma_max: 0.5  # default value
+  sampling:
+    spatial_dimension: 3
+    number_of_corrector_steps: 1
+    number_of_atoms: 8
+    number_of_samples: 16
+    sample_batchsize: None
+    sample_every_n_epochs: 2
+    cell_dimensions: [5.43, 5.43, 5.43]
+
+logging:
+  - comet
+#- tensorboard
+#- csv
diff --git a/examples/local/diffusion/run_diffusion.sh b/examples/local/diffusion/run_diffusion.sh
index 6860d6ca..ca54f033 100755
--- a/examples/local/diffusion/run_diffusion.sh
+++ b/examples/local/diffusion/run_diffusion.sh
@@ -3,7 +3,7 @@
 # This example assumes that the dataset 'si_diffusion_small' is present locally in the DATA folder.
 # It is also assumed that the user has a Comet account for logging experiments.
 
-CONFIG=config_diffusion_mace.yaml
+CONFIG=../../config_files/diffusion/config_diffusion_mace.yaml
 DATA_DIR=../../../data/si_diffusion_1x1x1
 PROCESSED_DATA=${DATA_DIR}/processed
 DATA_WORK_DIR=./tmp_work_dir/
diff --git a/examples/local_orion/diffusion/config_diffusion.yaml b/examples/local_orion/diffusion/config_diffusion.yaml
deleted file mode 100644
index e2103835..00000000
--- a/examples/local_orion/diffusion/config_diffusion.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-# general
-exp_name: example_experiment
-max_epoch: 2
-log_every_n_steps: 1
-# fast_dev_run: True
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-gradient_clipping: 0
-
-# data
-data:
-  batch_size: 128
-  num_workers: 4
-  max_atom: 64
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    n_hidden_dimensions: 'orion~choices([1, 2, 3, 4])'
-    hidden_dimensions_size: 'orion~choices([256, 512, 1024, 2048])'
-  noise:
-    total_time_steps: 'orion~uniform(2, 20, discrete=True)'
-    sigma_min: 'orion~choices([0.001, 0.005, 0.01])'
-    sigma_max: 'orion~choices([0.1, 0.5, 0.75])'
-
-# optimizer
-optimizer:
-  name: adam
-  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
-
-# early stopping
-early_stopping:
-  metric: val_loss
-  mode: min
-  patience: 100
-
-model_checkpoint:
-  monitor: val_loss
-  mode: min
-
-logging:
-  - tensorboard
-  - comet
diff --git a/examples/local_orion/diffusion/run_orion.sh b/examples/local_orion/diffusion/run_orion.sh
index 254b0e15..927a1753 100755
--- a/examples/local_orion/diffusion/run_orion.sh
+++ b/examples/local_orion/diffusion/run_orion.sh
@@ -2,7 +2,7 @@ export ORION_DB_ADDRESS='orion_db.pkl'
 export ORION_DB_TYPE='pickleddb'
 
 ROOT_DIR=../../../
-CONFIG=config_diffusion.yaml
+CONFIG=../../config_files/diffusion/config_diffusion_mlp_orion.yaml
 DATA_DIR=${ROOT_DIR}/data/si_diffusion_small
 PROCESSED_DATA=${DATA_DIR}/processed
 DATA_WORK_DIR=./tmp_work_dir/
diff --git a/examples/narval_orion/diffusion/config_diffusion.yaml b/examples/narval_orion/diffusion/config_diffusion.yaml
deleted file mode 100644
index 65c433d9..00000000
--- a/examples/narval_orion/diffusion/config_diffusion.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-# general
-exp_name: example_experiment
-max_epoch: 2
-log_every_n_steps: 1
-# fast_dev_run: True
-# set to null to avoid setting a seed (can speed up GPU computation, but
-# results will not be reproducible)
-seed: 1234
-gradient_clipping: 0
-
-# data
-data:
-  batch_size: 128
-  num_workers: 4
-  max_atom: 64
-
-# architecture
-spatial_dimension: 3
-model:
-  score_network:
-    n_hidden_dimensions: 'orion~choices([1, 2, 3, 4])'
-    hidden_dimensions_size: 'orion~choices([256, 512, 1024, 2048])'
-  noise:
-    total_time_steps: 'orion~uniform(2, 20, discrete=True)'
-    sigma_min: 'orion~choices([0.001, 0.005, 0.01])'
-    sigma_max: 'orion~choices([0.1, 0.5, 0.75])'
-
-# optimizer
-optimizer:
-  name: adam
-  learning_rate: 'orion~loguniform(1e-6, 1e-3)'
-
-# early stopping
-early_stopping:
-  metric: validation_epoch_loss
-  mode: min
-  patience: 100
-
-model_checkpoint:
-  monitor: validation_epoch_loss
-  mode: min
-
-logging:
-  - tensorboard
-  - comet

From d887cd107799b0fda4e724d26b88f1f688b392d3 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 15:13:59 -0400
Subject: [PATCH 11/13] fixing a bug with biases in condition_embedding_layer

---
 crystal_diffusion/models/diffusion_mace.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/crystal_diffusion/models/diffusion_mace.py b/crystal_diffusion/models/diffusion_mace.py
index 91b25772..840f9f59 100644
--- a/crystal_diffusion/models/diffusion_mace.py
+++ b/crystal_diffusion/models/diffusion_mace.py
@@ -259,11 +259,10 @@ def __init__(
 
         # Apply a MLP with a bias on the forces as a conditional feature. This would be a 1o irrep
         forces_irreps_in = o3.Irreps("1x1o")
-        # the l=0 irreps is there to allow a bias in the embedding
-        forces_irreps_embedding = o3.Irreps(f"{condition_embedding_size}x0e + {condition_embedding_size}x1o")
+        forces_irreps_embedding = o3.Irreps(f"{condition_embedding_size}x1o")
         self.condition_embedding_layer = o3.Linear(irreps_in=forces_irreps_in,
                                                    irreps_out=forces_irreps_embedding,
-                                                   biases=True)
+                                                   biases=False)  # can't have biases with 1o irreps
 
         # conditional layers for the forces as a conditional feature to guide the diffusion
         self.conditional_layers = torch.nn.ModuleList([])

From f77d70c25d9547ebc84c21acc0f7ccd4acafa5e2 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Tue, 11 Jun 2024 15:25:02 -0400
Subject: [PATCH 12/13] adding yaml config

---
 crystal_diffusion/models/diffusion_mace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crystal_diffusion/models/diffusion_mace.py b/crystal_diffusion/models/diffusion_mace.py
index 840f9f59..a7a691fe 100644
--- a/crystal_diffusion/models/diffusion_mace.py
+++ b/crystal_diffusion/models/diffusion_mace.py
@@ -270,7 +270,7 @@ def __init__(
             cond_layer = o3.Linear(
                 irreps_in=forces_irreps_embedding,
                 irreps_out=hidden_irreps_out,
-                biases=True
+                biases=False
             )
             self.conditional_layers.append(cond_layer)
 

From 3421ebb3644f3af3afd8c6d792a14032cdacb917 Mon Sep 17 00:00:00 2001
From: Simon Blackburn <simon.blackburn@mila.quebec>
Date: Mon, 10 Jun 2024 16:11:22 -0400
Subject: [PATCH 13/13] fixing unit test

---
 tests/models/test_diffusion_mace.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/models/test_diffusion_mace.py b/tests/models/test_diffusion_mace.py
index e03113ae..f6e7ef46 100644
--- a/tests/models/test_diffusion_mace.py
+++ b/tests/models/test_diffusion_mace.py
@@ -6,7 +6,8 @@
 from crystal_diffusion.models.diffusion_mace import (DiffusionMACE,
                                                      LinearVectorReadoutBlock,
                                                      input_to_diffusion_mace)
-from crystal_diffusion.namespace import (NOISE, NOISY_CARTESIAN_POSITIONS,
+from crystal_diffusion.namespace import (CARTESIAN_FORCES, NOISE,
+                                         NOISY_CARTESIAN_POSITIONS,
                                          NOISY_RELATIVE_COORDINATES, TIME,
                                          UNIT_CELL)
 from crystal_diffusion.utils.basis_transformations import (
@@ -88,12 +89,17 @@ def noises(self, batch_size):
         return 0.5 * torch.rand(batch_size, 1)
 
     @pytest.fixture(scope='class')
-    def batch(self, relative_coordinates, cartesian_positions, basis_vectors, times, noises):
+    def forces(self, batch_size, spatial_dimension):
+        return 0.5 * torch.rand(batch_size, spatial_dimension)
+
+    @pytest.fixture(scope='class')
+    def batch(self, relative_coordinates, cartesian_positions, basis_vectors, times, noises, forces):
         batch = {NOISY_RELATIVE_COORDINATES: relative_coordinates,
                  NOISY_CARTESIAN_POSITIONS: cartesian_positions,
                  TIME: times,
                  NOISE: noises,
-                 UNIT_CELL: basis_vectors}
+                 UNIT_CELL: basis_vectors,
+                 CARTESIAN_FORCES: forces}
         return batch
 
     @pytest.fixture(scope='class')