From fd25a21641265347a7f10566c87be7c88b32143c Mon Sep 17 00:00:00 2001
From: Bruno Rousseau <rousseau.bruno@gmail.com>
Date: Thu, 26 Dec 2024 19:33:38 -0500
Subject: [PATCH] an example using the on-the-fly data module

---
 .../config_diffusion_egnn_2_atoms_in_1D.yaml  | 117 ++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 examples/config_files/diffusion/config_diffusion_egnn_2_atoms_in_1D.yaml

diff --git a/examples/config_files/diffusion/config_diffusion_egnn_2_atoms_in_1D.yaml b/examples/config_files/diffusion/config_diffusion_egnn_2_atoms_in_1D.yaml
new file mode 100644
index 00000000..a234112b
--- /dev/null
+++ b/examples/config_files/diffusion/config_diffusion_egnn_2_atoms_in_1D.yaml
@@ -0,0 +1,117 @@
+#================================================================================
+# Configuration file for a diffusion experiment for 2 pseudo-atoms in 1D.
+#
+# An 'on-the-fly' Gaussian dataset is created and used for training.
+#================================================================================
+exp_name: egnn_2_atoms_in_1D
+run_name: run1
+max_epoch: 1000
+log_every_n_steps: 1
+gradient_clipping: 0.0
+accumulate_grad_batches: 1  # make this number of forward passes before doing a backprop step
+
+elements: [A]
+
+# set to null to avoid setting a seed (can speed up GPU computation, but
+# results will not be reproducible)
+seed: 1234
+
+# On-the-fly Data Module that creates a Gaussian dataset.
+data:
+  data_source: gaussian
+  random_seed: 42
+  number_of_atoms: 2
+  sigma_d: 0.01
+  equilibrium_relative_coordinates: 
+    - [0.25]
+    - [0.75]
+
+  train_dataset_size: 8_192
+  valid_dataset_size: 1_024
+
+  batch_size:  64 
+  num_workers: 0
+  max_atom: 2
+  spatial_dimension: 1
+
+
+spatial_dimension: 1
+
+model:
+  loss:
+    coordinates_algorithm: mse
+    atom_types_ce_weight: 0.0
+    atom_types_lambda_weight: 0.0
+    relative_coordinates_lambda_weight: 1.0
+    lattice_lambda_weight: 0.0  
+  score_network:
+    architecture: egnn
+    spatial_dimension: 1
+    num_atom_types: 1
+    n_layers: 4
+    coordinate_hidden_dimensions_size: 128
+    coordinate_n_hidden_dimensions: 4
+    coords_agg: "mean"
+    message_hidden_dimensions_size: 128
+    message_n_hidden_dimensions: 4
+    node_hidden_dimensions_size: 128
+    node_n_hidden_dimensions: 4
+    attention: False
+    normalize: True
+    residual: True
+    tanh: False
+    edges: fully_connected
+  noise:
+    total_time_steps: 100
+    sigma_min: 0.001 
+    sigma_max: 0.2
+
+# optimizer and scheduler
+optimizer:
+  name: adamw
+  learning_rate: 0.001
+  weight_decay: 5.0e-8
+
+
+scheduler:
+  name: CosineAnnealingLR
+  T_max: 1000
+  eta_min: 0.0
+
+# early stopping
+early_stopping:
+  metric: validation_epoch_loss
+  mode: min
+  patience: 1000
+
+model_checkpoint:
+  monitor: validation_epoch_loss
+  mode: min
+
+score_viewer:
+  record_every_n_epochs: 1
+
+  score_viewer_parameters: 
+    sigma_min: 0.001 
+    sigma_max: 0.2
+    number_of_space_steps: 100
+    starting_relative_coordinates: 
+      - [0.0]
+      - [1.0]
+    ending_relative_coordinates: 
+      - [1.0]
+      - [0.0]
+  analytical_score_network:
+    architecture: "analytical"
+    spatial_dimension: 1
+    number_of_atoms: 2
+    num_atom_types: 1
+    kmax: 5
+    equilibrium_relative_coordinates: 
+      - [0.25]
+      - [0.75]
+    sigma_d: 0.01
+    use_permutation_invariance: True
+
+logging:
+  - tensorboard