configs/example.yaml

# an example yaml file
# for a full yaml file containing all possible features check out full.yaml

# Two folders will be used during the training: 'root'/process and 'root'/'run_name'
# run_name contains logfiles and saved models
# process contains processed data sets
# if 'root'/'run_name' exists, 'root'/'run_name'_'year'-'month'-'day'-'hour'-'min'-'s' will be used instead.
root: results/aspirin
run_name: example-run
seed: 0                                                                           # random number seed for numpy and torch
restart: false                                                                    # set True for a restarted run
append: false                                                                     # set True if a restarted run should append to the previous log file
default_dtype: float32                                                            # type of float to use, e.g. float32 and float64

# network
r_max: 4.0                                                                        # cutoff radius in length units

num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
feature_irreps_hidden: 32x0o + 32x0e + 16x1o + 16x1e + 8x2o + 8x2e                # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
irreps_edge_sh: 0e + 1o + 2e                                                      # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block

nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
resnet: false                                                                     # set true to make interaction block a resnet-style update
num_basis: 8                                                                      # number of basis functions used in the radial basis

# radial network
invariant_layers: 2                                                               # number of radial layers, we found it important to keep this small, 1 or 2
invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
avg_num_neighbors: null                                                           # number of neighbors to divide by, None => no normalization.
use_sc: true                                                                      # use self-connection or not, usually gives big improvement

# to specify different parameters for each convolutional layer, try examples below
# layer1_use_sc: true                                                             # use "layer{i}_" prefix to specify parameters for only one of the layer,
# priority for different definition:
#   invariant_neurons < InteractionBlock_invariant_neurons < layer{i}_invariant_neurons

# data set
# the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
# key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
# all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
# note that if your data set uses pbc, you need to also pass an array that maps to the nequip "pbc" key
dataset: npz                                                                       # type of data set, can be npz or ase
dataset_url: http://quantum-machine.org/gdml/data/npz/aspirin_ccsd.zip             # url to download the npz. optional
dataset_file_name: ./benchmark_data/aspirin_ccsd-train.npz                         # path to data set file
key_mapping:
  z: atomic_numbers                                                                # atomic species, integers
  E: total_energy                                                                  # total potential eneriges to train to
  F: forces                                                                        # atomic forces to train to
  R: pos                                                                           # raw atomic positions
npz_fixed_field_keys:                                                              # fields that are repeated across different examples
  - atomic_numbers

# As an alternative option to npz, you can also pass data ase ASE Atoms-objects
# This can often be easier to work with, simply make sure the ASE Atoms object
# has a calculator for which atoms.get_potential_energy() and atoms.get_forces() are defined
# dataset: ase
# dataset_file_name: xxx.xyz                                                       # need to be a format accepted by ase.io.read
# ase_args:                                                                        # any arguments needed by ase.io.read
#   format: extxyz

# logging
wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
wandb_project: aspirin                                                             # project name used in wandb
wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
                                                                                   # if false, a new wandb run will be generated
verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
log_batch_freq: 1                                                                  # batch frequency, how often to print training errors withinin the same epoch
log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model

# training
n_train: 100                                                                       # number of training data
n_val: 50                                                                          # number of validation data
learning_rate: 0.01                                                                # learning rate, we found values between 0.01 and 0.005 to work best - this is often one of the most important hyperparameters to tune
batch_size: 5                                                                      # batch size, we found it important to keep this small for most applications (1-5)
max_epochs: 1000000                                                                # stop training after _ number of epochs
metrics_key: loss                                                                  # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
use_ema: false                                                                     # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
ema_decay: 0.999                                                                   # ema weight, commonly set to 0.999

# loss function
loss_coeffs:                                                                       # different weights to use in a weighted loss functions
  forces: 100                                                                      # for MD applications, we recommed a force weight of 100 and an energy weight of 1
  total_energy: 1                                                                  # alternatively, if energies are not of importance, a force weight 1 and an energy weight of 0 also works.

# output metrics
metrics_components:
  - - forces                               # key
    - rmse                                 # "rmse" or "mse"
    - PerSpecies: True                     # if true, per species contribution is counted separately
      report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
  - - forces
    - mae
    - PerSpecies: True
      report_per_component: False
  - - total_energy
    - mae

# optimizer, may be any optimizer defined in torch.optim
# the name `optimizer_name`is case sensitive
optimizer_name: Adam
optimizer_amsgrad: true

# lr scheduler, on plateau
lr_scheduler_name: ReduceLROnPlateau
lr_scheduler_patience: 100
lr_scheduler_factor: 0.5