From fd25a21641265347a7f10566c87be7c88b32143c Mon Sep 17 00:00:00 2001 From: Bruno Rousseau Date: Thu, 26 Dec 2024 19:33:38 -0500 Subject: [PATCH] an example using the on-the-fly data module --- .../config_diffusion_egnn_2_atoms_in_1D.yaml | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 examples/config_files/diffusion/config_diffusion_egnn_2_atoms_in_1D.yaml diff --git a/examples/config_files/diffusion/config_diffusion_egnn_2_atoms_in_1D.yaml b/examples/config_files/diffusion/config_diffusion_egnn_2_atoms_in_1D.yaml new file mode 100644 index 00000000..a234112b --- /dev/null +++ b/examples/config_files/diffusion/config_diffusion_egnn_2_atoms_in_1D.yaml @@ -0,0 +1,117 @@ +#================================================================================ +# Configuration file for a diffusion experiment for 2 pseudo-atoms in 1D. +# +# An 'on-the-fly' Gaussian dataset is created and used for training. +#================================================================================ +exp_name: egnn_2_atoms_in_1D +run_name: run1 +max_epoch: 1000 +log_every_n_steps: 1 +gradient_clipping: 0.0 +accumulate_grad_batches: 1 # make this number of forward passes before doing a backprop step + +elements: [A] + +# set to null to avoid setting a seed (can speed up GPU computation, but +# results will not be reproducible) +seed: 1234 + +# On-the-fly Data Module that creates a Gaussian dataset. +data: + data_source: gaussian + random_seed: 42 + number_of_atoms: 2 + sigma_d: 0.01 + equilibrium_relative_coordinates: + - [0.25] + - [0.75] + + train_dataset_size: 8_192 + valid_dataset_size: 1_024 + + batch_size: 64 + num_workers: 0 + max_atom: 2 + spatial_dimension: 1 + + +spatial_dimension: 1 + +model: + loss: + coordinates_algorithm: mse + atom_types_ce_weight: 0.0 + atom_types_lambda_weight: 0.0 + relative_coordinates_lambda_weight: 1.0 + lattice_lambda_weight: 0.0 + score_network: + architecture: egnn + spatial_dimension: 1 + num_atom_types: 1 + n_layers: 4 + coordinate_hidden_dimensions_size: 128 + coordinate_n_hidden_dimensions: 4 + coords_agg: "mean" + message_hidden_dimensions_size: 128 + message_n_hidden_dimensions: 4 + node_hidden_dimensions_size: 128 + node_n_hidden_dimensions: 4 + attention: False + normalize: True + residual: True + tanh: False + edges: fully_connected + noise: + total_time_steps: 100 + sigma_min: 0.001 + sigma_max: 0.2 + +# optimizer and scheduler +optimizer: + name: adamw + learning_rate: 0.001 + weight_decay: 5.0e-8 + + +scheduler: + name: CosineAnnealingLR + T_max: 1000 + eta_min: 0.0 + +# early stopping +early_stopping: + metric: validation_epoch_loss + mode: min + patience: 1000 + +model_checkpoint: + monitor: validation_epoch_loss + mode: min + +score_viewer: + record_every_n_epochs: 1 + + score_viewer_parameters: + sigma_min: 0.001 + sigma_max: 0.2 + number_of_space_steps: 100 + starting_relative_coordinates: + - [0.0] + - [1.0] + ending_relative_coordinates: + - [1.0] + - [0.0] + analytical_score_network: + architecture: "analytical" + spatial_dimension: 1 + number_of_atoms: 2 + num_atom_types: 1 + kmax: 5 + equilibrium_relative_coordinates: + - [0.25] + - [0.75] + sigma_d: 0.01 + use_permutation_invariance: True + +logging: + - tensorboard