From 3f8369f0c82a7cbdafb60977800a25d17ea00bfe Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Fri, 29 Dec 2023 16:10:39 +0100 Subject: [PATCH 01/23] initalize clebsch_gordan submodule in rascaline.torch test torch array backend and add required dispatches add _classes.py forgot to push add torchscript test for correlate_density start torchscript test fixing TorchScript UnaryOp bug checkpoint: all-deps test pass and made progress on TorchScriptabilty checkpoint2: all-deps test pass and made progress on TorchScriptabilty checkpoint3: all-deps test pass and made progress on TorchScriptabilty checkpoint4: all-deps fails, invalid key pair (l1=0,l2=1,lam=2) is accessed in sparse_combine checkpoint5: all-deps test pass and made progress on TorchScriptabilty checkpoint6: all-deps test pass and made progress on TorchScriptabilty - abstracted out Dict[Tuple[int,int,int], Array] and Dict[Tuple[int,int,int], Dict[Tuple[int,int,int], Array]] into custom classes with utilities that allow similar access checkpoint7: all-deps test pass and made progress on TorchScriptabilty - sparse property in ClebschGordanReal is determined by type of coeffs because we need to do anyway isinstance checks for TorchScript to distinguish the two types in the different functions Union[SparseCgDict, DenseCgDict] -> SparseCgDict - replacing input parameter `return_empty_array` in `sparse combine` function in _cg_cache.py by `empty_combine` function that to distinguish None type for TorchScript checkpoint8: all-deps test pass and made progress on TorchScriptabilty - Labels.insert cannot broadcast in metatensor.torch, correct shape has to be given - torch.tensors of type int32 cannot be converted to lists using tolist() https://github.com/pytorch/pytorch/issues/76295 therefore added tolist dispatch function that first converts the array to int64 checkpoint9: all-deps test pass and made progress on TorchScriptabilty, torch-test test_torch_script_correlate_density_angular_selection passes - changed to comlex type until real conversion otherwise (complex, real) operation checkpoint10: all-deps test pass and made progress on TorchScriptabilty, torch-test test_torch_script_correlate_density_angular_selection passes - made `like` parameter in _parse_selected_keys all-deps test and torch-tests pass - all isinstance check are moved to the false branch of jit.is_scripting linting fixes and partial format fix doctest format remove _dispatch.array_like during documentation building the jit scripting of functions needs to be disabled integrating labels_array_like into int_array_like cleaned code, removed unnecessary dispatch operations remove TODOs in cg_cache --- .../rascaline/torch/utils/__init__.py | 3 +- .../rascaline/torch/utils/clebsch_gordan.py | 70 +++ .../tests/utils/correlate_density.py | 87 +++ .../tests/utils/data/h2o_isolated.xyz | 5 + .../utils/clebsch_gordan/_cg_cache.py | 539 ++++++++++++------ .../utils/clebsch_gordan/_classes.py | 40 ++ .../utils/clebsch_gordan/_clebsch_gordan.py | 422 +++++++++----- .../utils/clebsch_gordan/_dispatch.py | 290 +++++++++- .../utils/clebsch_gordan/correlate_density.py | 71 ++- .../tests/utils/correlate_density.py | 79 ++- tox.ini | 12 + 11 files changed, 1245 insertions(+), 373 deletions(-) create mode 100644 python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py create mode 100644 python/rascaline-torch/tests/utils/correlate_density.py create mode 100644 python/rascaline-torch/tests/utils/data/h2o_isolated.xyz create mode 100644 python/rascaline/rascaline/utils/clebsch_gordan/_classes.py diff --git a/python/rascaline-torch/rascaline/torch/utils/__init__.py b/python/rascaline-torch/rascaline/torch/utils/__init__.py index 3a73c2d20..9bd890fa2 100644 --- a/python/rascaline-torch/rascaline/torch/utils/__init__.py +++ b/python/rascaline-torch/rascaline/torch/utils/__init__.py @@ -1,5 +1,6 @@ import os +from . import clebsch_gordan from .power_spectrum import PowerSpectrum @@ -10,4 +11,4 @@ Path containing the CMake configuration files for the underlying C library """ -__all__ = ["PowerSpectrum"] +__all__ = ["PowerSpectrum", "clebsch_gordan"] diff --git a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py new file mode 100644 index 000000000..94b7f82ea --- /dev/null +++ b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py @@ -0,0 +1,70 @@ +import importlib +import os +import sys +from typing import Any + +import torch +from metatensor.torch import Labels, LabelsEntry, TensorBlock, TensorMap + +import rascaline.utils.clebsch_gordan + + +# For details what is happening here take a look an `rascaline.torch.calculators`. + +# Step 1: create te `_classes` module as an empty module +spec = importlib.util.spec_from_loader( + "rascaline.torch.utils.clebsch_gordan._classes", + loader=None, +) +module = importlib.util.module_from_spec(spec) +# This module only exposes a handful of things, defined here. Any changes here MUST also +# be made to the `metatensor/operations/_classes.py` file, which is used in non +# TorchScript mode. +module.__dict__["Labels"] = Labels +module.__dict__["TensorBlock"] = TensorBlock +module.__dict__["TensorMap"] = TensorMap +module.__dict__["LabelsEntry"] = LabelsEntry +module.__dict__["torch_jit_is_scripting"] = torch.jit.is_scripting +module.__dict__["torch_jit_annotate"] = torch.jit.annotate +module.__dict__["TorchTensor"] = torch.Tensor +module.__dict__["Array"] = torch.Tensor + + +def is_labels(obj: Any): + return isinstance(obj, Labels) + + +if os.environ.get("RASCALINE_IMPORT_FOR_SPHINX") is None: + is_labels = torch.jit.script(is_labels) + +module.__dict__["is_labels"] = is_labels + + +def check_isinstance(obj, ty): + if isinstance(ty, torch.ScriptClass): + # This branch is taken when `ty` is a custom class (TensorMap, …). since `ty` is + # an instance of `torch.ScriptClass` and not a class itself, there is no way to + # check if obj is an "instance" of this class, so we always return True and hope + # for the best. Most errors should be caught by the TorchScript compiler anyway. + return True + else: + assert isinstance(ty, type) + return isinstance(obj, ty) + + +module.__dict__["check_isinstance"] = check_isinstance + +# register the module in sys.modules, so future import find it directly +sys.modules[spec.name] = module + + +# Step 2: create a module named `rascaline.torch.utils.clebsch_gordan` using code from +# `rascaline.utils.clebsch_gordan` +spec = importlib.util.spec_from_file_location( + "rascaline.torch.utils.clebsch_gordan", + rascaline.utils.clebsch_gordan.__file__, +) + +module = importlib.util.module_from_spec(spec) +sys.modules[spec.name] = module +spec.loader.exec_module(module) diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py new file mode 100644 index 000000000..700db244c --- /dev/null +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +import io +import os +from typing import List + +import ase.io +import metatensor.torch +import pytest +import torch +from metatensor.torch import Labels, TensorBlock, TensorMap # noqa + +import rascaline.torch +from rascaline.torch.utils.clebsch_gordan.correlate_density import correlate_density + + +DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") + +SPHEX_HYPERS = { + "cutoff": 2.5, # Angstrom + "max_radial": 3, # Exclusive + "max_angular": 3, # Inclusive + "atomic_gaussian_width": 0.2, + "radial_basis": {"Gto": {}}, + "cutoff_function": {"ShiftedCosine": {"width": 0.5}}, + "center_atom_weight": 1.0, +} + + +def h2o_isolated(): + return ase.io.read(os.path.join(DATA_ROOT, "h2o_isolated.xyz"), ":") + + +def spherical_expansion(frames: List[ase.Atoms]): + """Returns a rascaline SphericalExpansion""" + calculator = rascaline.torch.SphericalExpansion(**SPHEX_HYPERS) + return calculator.compute(rascaline.torch.systems_to_torch(frames)) + + +# copy of def test_correlate_density_angular_selection( +@pytest.mark.parametrize( + "selected_keys", + [ + None, + Labels( + names=["spherical_harmonics_l"], values=torch.tensor([1, 3]).reshape(-1, 1) + ), + ], +) +@pytest.mark.parametrize("skip_redundant", [True, False]) +def test_torch_script_correlate_density_angular_selection( + selected_keys: Labels, + skip_redundant: bool, +): + """ + Tests that the correct angular channels are output based on the specified + ``selected_keys``. + """ + frames = h2o_isolated() + nu_1 = spherical_expansion(frames) + scripted_correlate_density = torch.jit.script(correlate_density) + scripted_nu_2 = scripted_correlate_density( + density=nu_1, + correlation_order=2, + angular_cutoff=None, + selected_keys=selected_keys, + skip_redundant=skip_redundant, + ) + nu_2 = correlate_density( + density=nu_1, + correlation_order=2, + angular_cutoff=None, + selected_keys=selected_keys, + skip_redundant=skip_redundant, + ) + assert metatensor.torch.equal_metadata(scripted_nu_2, nu_2) + # The test below cannot pass for the moment until we can script wigners or extract + # cg_cache out of the scripting. For the moment the output is only zeros + # assert metatensor.torch.allclose(scripted_nu_2, nu_2) + + +def test_save_load(): + scripted_correlate_density = torch.jit.script(correlate_density) + buffer = io.BytesIO() + torch.jit.save(scripted_correlate_density, buffer) + buffer.seek(0) + torch.jit.load(buffer) + buffer.close() diff --git a/python/rascaline-torch/tests/utils/data/h2o_isolated.xyz b/python/rascaline-torch/tests/utils/data/h2o_isolated.xyz new file mode 100644 index 000000000..fc876d2ba --- /dev/null +++ b/python/rascaline-torch/tests/utils/data/h2o_isolated.xyz @@ -0,0 +1,5 @@ +3 +pbc="F F F" +O 2.56633400 2.50000000 2.50370100 +H 1.97361700 1.73067300 2.47063400 +H 1.97361700 3.26932700 2.47063400 diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index 4a6118bd3..1ec80a159 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -3,28 +3,58 @@ Gordan coefficients for use in CG combinations. """ -from typing import Union +import math +from typing import Dict, List, Optional, Union import numpy as np import wigners from . import _dispatch +from ._classes import Array, torch_jit_annotate, torch_jit_is_scripting try: from mops import sparse_accumulation_of_products as sap # noqa F401 - HAS_MOPS = True + # We need to define a variable that is globally accessible in this way to be + # compatible with torch script + class MOPS_CONFIG: + def __init__(self): + return + + def is_installed(self) -> bool: + return True + except ImportError: - HAS_MOPS = False + + class MOPS_CONFIG: + def __init__(self): + return + + def is_installed(self) -> bool: + return False + try: + import torch from torch import Tensor as TorchTensor + + torch_dtype = torch.dtype + torch_device = torch.device + + HAS_TORCH = True except ImportError: + HAS_TORCH = False class TorchTensor: pass + class torch_dtype: + pass + + class torch_device: + pass + UNKNOWN_ARRAY_TYPE = ( "unknown array type, only numpy arrays and torch tensors are supported" @@ -129,14 +159,24 @@ class ClebschGordanReal: :param use_mops: whether to store the CG coefficients in MOPS sparse format. This is recommended as the default for sparse accumulation, but can only be used if Mops is installed. + :param use_torch: whether torch tensor or numpy arrays should be used for the cg + coeffs """ - def __init__(self, lambda_max: int, sparse: bool = True, use_mops: bool = HAS_MOPS): + def __init__( + self, + lambda_max: int, + sparse: bool = True, + use_mops: Optional[bool] = None, + use_torch: bool = False, + ): self._lambda_max = lambda_max - self._sparse = sparse + # For TorchScript we declare type + self._use_mops: bool = False if sparse: - if not HAS_MOPS: + if use_mops is None: + self._use_mops = MOPS_CONFIG().is_installed() # TODO: provide a warning once Mops is fully ready # import warnings # warnings.warn( @@ -145,11 +185,17 @@ def __init__(self, lambda_max: int, sparse: bool = True, use_mops: bool = HAS_MO # " git+https://github.com/lab-cosmo/mops`." # " Falling back to numpy for now." # ) - self._use_mops = False else: - self._use_mops = True + if use_mops and not MOPS_CONFIG().is_installed(): + raise ImportError("Specified to use MOPS, but it is not installed.") + else: + self._use_mops = use_mops else: + # The logic is a bit complicated so TorchScript can understand that it is + # not None + if use_mops is None: + self._use_mops = False # TODO: provide a warning once Mops is fully ready # if HAS_MOPS: # import warnings @@ -157,12 +203,26 @@ def __init__(self, lambda_max: int, sparse: bool = True, use_mops: bool = HAS_MO # "Mops is installed, but not being used" # " as dense operations chosen." # ) - self._use_mops = False + elif use_mops: + raise ImportError("MOPS is not available for non sparse operations.") + else: + self._use_mops = False - self._coeffs = ClebschGordanReal.build_coeff_dict( + if torch_jit_is_scripting(): + if not use_torch: + raise ValueError( + "use_torch is False, but this option is not supported when torch" + " scripted." + ) + self._use_torch = True + else: + self._use_torch = use_torch + + self._coeffs = _build_cg_coeff_dict( self._lambda_max, - self._sparse, + sparse, self._use_mops, + self._use_torch, ) @property @@ -171,7 +231,7 @@ def lambda_max(self): @property def sparse(self): - return self._sparse + return isinstance(self._coeffs, SparseCgDict) @property def use_mops(self): @@ -181,77 +241,199 @@ def use_mops(self): def coeffs(self): return self._coeffs - @staticmethod - def build_coeff_dict(lambda_max: int, sparse: bool, use_mops: bool): - """ - Builds a dictionary of Clebsch-Gordan coefficients for all possible - combination of l1 and l2, up to lambda_max. - """ - # real-to-complex and complex-to-real transformations as matrices - r2c = {} - c2r = {} - coeff_dict = {} - for lambda_ in range(0, lambda_max + 1): - c2r[lambda_] = _complex2real(lambda_) - r2c[lambda_] = _real2complex(lambda_) - - for l1 in range(lambda_max + 1): - for l2 in range(lambda_max + 1): - for lambda_ in range( - max(l1, l2) - min(l1, l2), min(lambda_max, (l1 + l2)) + 1 - ): - complex_cg = _complex_clebsch_gordan_matrix(l1, l2, lambda_) - - real_cg = (r2c[l1].T @ complex_cg.reshape(2 * l1 + 1, -1)).reshape( - complex_cg.shape - ) - real_cg = real_cg.swapaxes(0, 1) - real_cg = (r2c[l2].T @ real_cg.reshape(2 * l2 + 1, -1)).reshape( - real_cg.shape - ) - real_cg = real_cg.swapaxes(0, 1) +class DenseCgDict: + """ + This is a class imtates the access of a Dict[Tuple[int, int, int], Array] object. + We cannot directly use a dict of this type because we support TorchScript + and TorchScript only supports dicts of type Dict[int], Dict[float], Dict[str]. + Internally we represent data structure as Dict[int, Dict[int, Dict[int, Array]]] + + Reference + --------- + https://pytorch.org/docs/stable/jit_language_reference.html + """ - real_cg = real_cg @ c2r[lambda_].T + def __init__(self): + self._dict: Dict[int, Dict[int, Dict[int, Array]]] = {} + + def get(self, i: int, j: int, k: int): + # __getitem__ is not supported by TorchScript + return self._dict[i][j][k] + + def set(self, i: int, j: int, k: int, value: Array): + # __setitem__ is not supported by TorchScript + if i not in self._dict: + self._dict[i] = torch_jit_annotate(Dict[int, Dict[int, Array]], {}) + if j not in self._dict[i]: + self._dict[i][j] = torch_jit_annotate(Dict[int, Array], {}) + self._dict[i][j][k] = value + + def delete(self, i: int, j: int, k: int): + # __delitem__ is not supported by TorchScript + del self._dict[i][j][k] + if len(self._dict[i][j]) == 0: + del self._dict[i][j] + if len(self._dict[i]) == 0: + del self._dict[i] + + def keys(self): + keys: List[List[int]] = [] + for i in self._dict.keys(): + for j in self._dict[i].keys(): + for k in self._dict[i][j].keys(): + keys.append([i, j, k]) + return keys + + +class SparseCgDict: + """ + This is a class imtates the access of a Dict[Tuple[int, int, int], Array] object. + We cannot directly use a dict of this type because we support TorchScript + and TorchScript only supports dicts of type Dict[int], Dict[float], Dict[str]. + Internally we represent data structure as Dict[int, Dict[int, Dict[int, Array]]] + + Reference + --------- + https://pytorch.org/docs/stable/jit_language_reference.html + """ - if (l1 + l2 + lambda_) % 2 == 0: - cg_l1l2lam = np.real(real_cg) - else: - cg_l1l2lam = np.imag(real_cg) - - if sparse: - # Find the m1, m2, mu idxs of the nonzero CG coeffs - nonzeros_cg_coeffs_idx = np.where(np.abs(cg_l1l2lam) > 1e-15) - if use_mops: - # Store CG coeffs in a specific format for use in - # MOPS. Here we need the m1, m2, mu, and CG coeffs - # to be stored as separate 1D arrays. - m1_arr, m2_arr, mu_arr, C_arr = [], [], [], [] - for m1, m2, mu in zip(*nonzeros_cg_coeffs_idx): - m1_arr.append(m1) - m2_arr.append(m2) - mu_arr.append(mu) - C_arr.append(cg_l1l2lam[m1, m2, mu]) - - # Reorder the arrays based on sorted mu values - mu_idxs = np.argsort(mu_arr) - m1_arr = np.array(m1_arr)[mu_idxs] - m2_arr = np.array(m2_arr)[mu_idxs] - mu_arr = np.array(mu_arr)[mu_idxs] - C_arr = np.array(C_arr)[mu_idxs] - cg_l1l2lam = (C_arr, m1_arr, m2_arr, mu_arr) - else: - # Otherwise fall back to torch/numpy and store as - # sparse dicts. - cg_l1l2lam = { - (m1, m2, mu): cg_l1l2lam[m1, m2, mu] - for m1, m2, mu in zip(*nonzeros_cg_coeffs_idx) - } + def __init__(self): + self._dict: Dict[int, Dict[int, Dict[int, DenseCgDict]]] = {} + + def get(self, l1: int, l2: int, lambda_: int): + # __getitem__ is not supported by TorchScript + return self._dict[l1][l2][lambda_] + + def set(self, l1: int, l2: int, lambda_: int, value: DenseCgDict): + # __setitem__ is not supported by TorchScript + if l1 not in self._dict: + self._dict[l1] = torch_jit_annotate(Dict[int, Dict[int, DenseCgDict]], {}) + if l2 not in self._dict[l1]: + self._dict[l1][l2] = torch_jit_annotate(Dict[int, DenseCgDict], {}) + self._dict[l1][l2][lambda_] = value + + def delete(self, l1: int, l2: int, lambda_: int): + # __delitem__ is not supported by TorchScript + del self._dict[l1][l2][lambda_] + if len(self._dict[l1][l2]) == 0: + del self._dict[l1][l2] + if len(self._dict[l1]) == 0: + del self._dict[l1] + + def keys(self): + keys: List[List[int]] = [] + for l1 in self._dict.keys(): + for l2 in self._dict[l1].keys(): + for lambda_ in self._dict[l1][l2].keys(): + keys.append([l1, l2, lambda_]) + return keys + + +def _build_cg_coeff_dict( + lambda_max: int, sparse: bool, use_mops: bool, use_torch: bool +): + """ + Builds a dictionary of Clebsch-Gordan coefficients for all possible + combination of l1 and l2, up to lambda_max. + """ + # real-to-complex and complex-to-real transformations as matrices + r2c: Dict[int, Array] = {} + c2r: Dict[int, Array] = {} + + if sparse: + coeff_dict: Union[SparseCgDict, DenseCgDict] = SparseCgDict() + else: + coeff_dict: Union[SparseCgDict, DenseCgDict] = DenseCgDict() + if use_torch or torch_jit_is_scripting(): + complex_like = torch.empty(0, dtype=torch.complex128) + double_like = torch.empty(0, dtype=torch.double) + else: + complex_like = np.empty(0, dtype=np.complex128) + double_like = np.empty(0, dtype=np.double) + + for lambda_ in range(0, lambda_max + 1): + c2r[lambda_] = _complex2real(lambda_, like=complex_like) + r2c[lambda_] = _real2complex(lambda_, like=complex_like) + + for l1 in range(lambda_max + 1): + for l2 in range(lambda_max + 1): + for lambda_ in range( + max(l1, l2) - min(l1, l2), min(lambda_max, (l1 + l2)) + 1 + ): + complex_cg = _complex_clebsch_gordan_matrix( + l1, l2, lambda_, complex_like + ) + + real_cg = (r2c[l1].T @ complex_cg.reshape(2 * l1 + 1, -1)).reshape( + complex_cg.shape + ) + + real_cg = real_cg.swapaxes(0, 1) + real_cg = (r2c[l2].T @ real_cg.reshape(2 * l2 + 1, -1)).reshape( + real_cg.shape + ) + real_cg = real_cg.swapaxes(0, 1) + + real_cg = real_cg @ c2r[lambda_].T + + if (l1 + l2 + lambda_) % 2 == 0: + cg_l1l2lam_dense = _dispatch.real(real_cg) + else: + cg_l1l2lam_dense = _dispatch.imag(real_cg) + + if isinstance(coeff_dict, SparseCgDict): + # Find the m1, m2, mu idxs of the nonzero CG coeffs + nonzeros_cg_coeffs_idx = _dispatch.where( + _dispatch.abs(cg_l1l2lam_dense) > 1e-15 + ) + # Till MOPS does not TorchScript support we disable the scripting + # of this part here. + if not torch_jit_is_scripting() and use_mops: + # Store CG coeffs in a specific format for use in + # MOPS. Here we need the m1, m2, mu, and CG coeffs + # to be stored as separate 1D arrays. + m1_arr: List[int] = [] + m2_arr: List[int] = [] + mu_arr: List[int] = [] + C_arr: List[float] = [] + for i in range(len(nonzeros_cg_coeffs_idx[0])): + m1 = int(nonzeros_cg_coeffs_idx[0][i]) + m2 = int(nonzeros_cg_coeffs_idx[1][i]) + mu = int(nonzeros_cg_coeffs_idx[2][i]) + m1_arr.append(m1) + m2_arr.append(m2) + mu_arr.append(mu) + C_arr.append(float(cg_l1l2lam_dense[m1, m2, mu])) + + # Reorder the arrays based on sorted mu values + mu_idxs = _dispatch.argsort( + _dispatch.int_array_like(mu_arr, double_like) + ) + m1_arr = _dispatch.int_array_like(m1_arr, double_like)[mu_idxs] + m2_arr = _dispatch.int_array_like(m2_arr, double_like)[mu_idxs] + mu_arr = _dispatch.int_array_like(mu_arr, double_like)[mu_idxs] + C_arr = _dispatch.double_array_like(C_arr, double_like)[mu_idxs] + cg_l1l2lam_sparse = (C_arr, m1_arr, m2_arr, mu_arr) + coeff_dict.set(l1, l2, lambda_, cg_l1l2lam_sparse) + else: + # Otherwise fall back to torch/numpy and store as + # sparse dicts. + cg_l1l2lam_sparse = DenseCgDict() + for i in range(len(nonzeros_cg_coeffs_idx[0])): + m1 = nonzeros_cg_coeffs_idx[0][i] + m2 = nonzeros_cg_coeffs_idx[1][i] + mu = nonzeros_cg_coeffs_idx[2][i] + cg_l1l2lam_sparse.set( + m1, m2, mu, cg_l1l2lam_dense[m1, m2, mu] + ) + coeff_dict.set(l1, l2, lambda_, cg_l1l2lam_sparse) + else: # Store - coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam + coeff_dict.set(l1, l2, lambda_, cg_l1l2lam_dense) - return coeff_dict + return coeff_dict # ============================ @@ -259,7 +441,7 @@ def build_coeff_dict(lambda_max: int, sparse: bool, use_mops: bool): # ============================ -def _real2complex(lambda_: int) -> np.ndarray: +def _real2complex(lambda_: int, like: Array) -> Array: """ Computes a matrix that can be used to convert from real to complex-valued spherical harmonics(coefficients) of order ``lambda_``. @@ -269,38 +451,43 @@ def _real2complex(lambda_: int) -> np.ndarray: See https://en.wikipedia.org/wiki/Spherical_harmonics#Real_form for details on the convention for how these tranformations are defined. + + Operations are dispatched to the corresponding array type given by ``like`` """ - result = np.zeros((2 * lambda_ + 1, 2 * lambda_ + 1), dtype=np.complex128) - inv_sqrt_2 = 1.0 / np.sqrt(2) - i_sqrt_2 = 1j / np.sqrt(2) + result = _dispatch.zeros_like(like, (2 * lambda_ + 1, 2 * lambda_ + 1)) + inv_sqrt_2 = 1.0 / math.sqrt(2.0) + i_sqrt_2 = 1.0j / complex(math.sqrt(2.0)) + for m in range(-lambda_, lambda_ + 1): if m < 0: # Positve part - result[lambda_ + m, lambda_ + m] = +i_sqrt_2 + result[lambda_ + m, lambda_ + m] = i_sqrt_2 # Negative part result[lambda_ - m, lambda_ + m] = -i_sqrt_2 * ((-1) ** m) if m == 0: - result[lambda_, lambda_] = +1.0 + result[lambda_, lambda_] = 1.0 if m > 0: # Negative part - result[lambda_ - m, lambda_ + m] = +inv_sqrt_2 + result[lambda_ - m, lambda_ + m] = inv_sqrt_2 # Positive part - result[lambda_ + m, lambda_ + m] = +inv_sqrt_2 * ((-1) ** m) + result[lambda_ + m, lambda_ + m] = inv_sqrt_2 * ((-1) ** m) return result -def _complex2real(lambda_: int) -> np.ndarray: +def _complex2real(lambda_: int, like) -> Array: """ Converts from complex to real spherical harmonics. This is just given by the conjugate tranpose of the real->complex transformation matrices. + + Operations are dispatched to the corresponding array type given by ``like`` """ - return np.conjugate(_real2complex(lambda_)).T + return _dispatch.conjugate(_real2complex(lambda_, like)).T -def _complex_clebsch_gordan_matrix(l1, l2, lambda_): +def _complex_clebsch_gordan_matrix(l1: int, l2: int, lambda_: int, like: Array): r"""clebsch-gordan matrix Computes the Clebsch-Gordan (CG) matrix for transforming complex-valued spherical harmonics. @@ -321,12 +508,13 @@ def _complex_clebsch_gordan_matrix(l1, l2, lambda_): l1: l number for the first set of spherical harmonics l2: l number for the second set of spherical harmonics lambda_: l number For the third set of spherical harmonics + like: Operations are dispatched to the corresponding this arguments array type Returns: cg: CG matrix for transforming complex-valued spherical harmonics >>> from scipy.special import sph_harm >>> import numpy as np >>> import wigners - >>> C_112 = _complex_clebsch_gordan_matrix(1, 1, 2) + >>> C_112 = _complex_clebsch_gordan_matrix(1, 1, 2, np.empty(0)) >>> comp_sph_1 = np.array([sph_harm(m, 1, 0.2, 0.2) for m in range(-1, 1 + 1)]) >>> comp_sph_2 = np.array([sph_harm(m, 1, 0.2, 0.2) for m in range(-1, 1 + 1)]) >>> # obtain the (unnormalized) spherical harmonics @@ -339,10 +527,15 @@ def _complex_clebsch_gordan_matrix(l1, l2, lambda_): >>> np.allclose(ratio[0], ratio) True """ - if np.abs(l1 - l2) > lambda_ or np.abs(l1 + l2) < lambda_: - return np.zeros((2 * l1 + 1, 2 * l2 + 1, 2 * lambda_ + 1), dtype=np.double) + if abs(l1 - l2) > lambda_ or abs(l1 + l2) < lambda_: + return _dispatch.zeros_like(like, (2 * l1 + 1, 2 * l2 + 1, 2 * lambda_ + 1)) else: - return wigners.clebsch_gordan_array(l1, l2, lambda_) + # TODO temporary disable wigners package till refactor of cg correlate_density + # API + if torch_jit_is_scripting(): + return _dispatch.zeros_like(like, (2 * l1 + 1, 2 * l2 + 1, 2 * lambda_ + 1)) + else: + return wigners.clebsch_gordan_array(l1, l2, lambda_) # ================================================= @@ -351,12 +544,11 @@ def _complex_clebsch_gordan_matrix(l1, l2, lambda_): def combine_arrays( - arr_1: Union[np.ndarray, TorchTensor], - arr_2: Union[np.ndarray, TorchTensor], + arr_1: Array, + arr_2: Array, lambda_: int, - cg_cache, - return_empty_array: bool = False, -) -> Union[np.ndarray, TorchTensor]: + cg_cache: Union[ClebschGordanReal, None], +) -> Array: """ Couples arrays `arr_1` and `arr_2` corresponding to the irreducible spherical components of 2 angular channels l1 and l2 using the appropriate @@ -394,29 +586,52 @@ def combine_arrays( :param cg_cache: either a sparse dictionary with keys (m1, m2, mu) and array values being sparse blocks of shape , or a dense array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + 1)]. + If it is None we only return an empty array :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] """ # If just precomputing metadata, return an empty array - if return_empty_array: - return sparse_combine(arr_1, arr_2, lambda_, cg_cache, return_empty_array=True) + if cg_cache is None: + return empty_combine(arr_1, arr_2, lambda_) + + # We have to temporary store it so TorchScript can infer the correct type + cg_cache_coeffs = cg_cache.coeffs + if isinstance(cg_cache_coeffs, SparseCgDict): + return sparse_combine(arr_1, arr_2, lambda_, cg_cache_coeffs) + elif isinstance(cg_cache_coeffs, DenseCgDict): + return dense_combine(arr_1, arr_2, lambda_, cg_cache_coeffs) + else: + raise ValueError( + "Wrong type of cg coeffs, found type {type(cg_cache.coeffs)}," + " but only support SparseCgDict, DenseCgDict" + ) - # Otherwise, perform the CG combination - # Spare CG cache - if cg_cache.sparse: - return sparse_combine(arr_1, arr_2, lambda_, cg_cache, return_empty_array=False) - # Dense CG cache - return dense_combine(arr_1, arr_2, lambda_, cg_cache) +def empty_combine( + arr_1: Array, + arr_2: Array, + lambda_: int, +) -> Array: + """ + Returns the s Clebsch-Gordan combination step on 2 arrays using sparse + """ + # Samples dimensions must be the same + assert arr_1.shape[0] == arr_2.shape[0] + + # Define other useful dimensions + n_i = arr_1.shape[0] # number of samples + n_p = arr_1.shape[2] # number of properties in arr_1 + n_q = arr_2.shape[2] # number of properties in arr_2 + + return _dispatch.empty_like(arr_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) def sparse_combine( - arr_1: Union[np.ndarray, TorchTensor], - arr_2: Union[np.ndarray, TorchTensor], + arr_1: Array, + arr_2: Array, lambda_: int, - cg_cache, - return_empty_array: bool = False, -) -> Union[np.ndarray, TorchTensor]: + cg_cache_coeffs: SparseCgDict, +) -> Array: """ Performs a Clebsch-Gordan combination step on 2 arrays using sparse operations. The angular channel of each block is inferred from the size of @@ -445,10 +660,27 @@ def sparse_combine( n_p = arr_1.shape[2] # number of properties in arr_1 n_q = arr_2.shape[2] # number of properties in arr_2 - if return_empty_array: # used when only computing metadata - return _dispatch.zeros_like((n_i, 2 * lambda_ + 1, n_p * n_q), like=arr_1) + if isinstance(arr_1, TorchTensor) or not MOPS_CONFIG().is_installed(): + # Initialise output array + arr_out = _dispatch.zeros_like(arr_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) - if isinstance(arr_1, np.ndarray) and HAS_MOPS: + # Get the corresponding Clebsch-Gordan coefficients + cg_coeffs = cg_cache_coeffs.get(l1, l2, lambda_) + + # Fill in each mu component of the output array in turn + for item in cg_coeffs.keys(): + m1 = item[0] + m2 = item[1] + mu = item[2] + # Broadcast arrays, multiply together and with CG coeff + arr_out[:, mu, :] += ( + arr_1[:, m1, :, None] + * arr_2[:, m2, None, :] + * cg_coeffs.get(m1, m2, mu) + ).reshape(n_i, n_p * n_q) + + return arr_out + elif isinstance(arr_1, np.ndarray) and MOPS_CONFIG().is_installed(): # Reshape arr_1 = np.repeat(arr_1[:, :, :, None], n_q, axis=3).reshape( n_i, 2 * l1 + 1, n_p * n_q @@ -464,7 +696,7 @@ def sparse_combine( arr_out = sap( arr_1, arr_2, - *cg_cache._coeffs[(l1, l2, lambda_)], + *cg_cache_coeffs[(l1, l2, lambda_)], output_size=2 * lambda_ + 1, ) assert arr_out.shape == (n_i * n_p * n_q, 2 * lambda_ + 1) @@ -474,33 +706,16 @@ def sparse_combine( arr_out = _dispatch.swapaxes(arr_out, 1, 2) return arr_out - - if isinstance(arr_1, np.ndarray) or isinstance(arr_1, TorchTensor): - # Initialise output array - arr_out = _dispatch.zeros_like((n_i, 2 * lambda_ + 1, n_p * n_q), like=arr_1) - - # Get the corresponding Clebsch-Gordan coefficients - cg_coeffs = cg_cache.coeffs[(l1, l2, lambda_)] - - # Fill in each mu component of the output array in turn - for m1, m2, mu in cg_coeffs.keys(): - # Broadcast arrays, multiply together and with CG coeff - arr_out[:, mu, :] += ( - arr_1[:, m1, :, None] * arr_2[:, m2, None, :] * cg_coeffs[(m1, m2, mu)] - ).reshape(n_i, n_p * n_q) - - return arr_out - else: raise TypeError(UNKNOWN_ARRAY_TYPE) def dense_combine( - arr_1: Union[np.ndarray, TorchTensor], - arr_2: Union[np.ndarray, TorchTensor], + arr_1: Array, + arr_2: Array, lambda_: int, - cg_cache, -) -> Union[np.ndarray, TorchTensor]: + cg_cache_coeffs: DenseCgDict, +) -> Array: """ Performs a Clebsch-Gordan combination step on 2 arrays using a dense operation. The angular channel of each block is inferred from the size of @@ -517,36 +732,32 @@ def dense_combine( :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] """ - if isinstance(arr_1, np.ndarray) or isinstance(arr_1, TorchTensor): - # Infer l1 and l2 from the len of the length of axis 1 of each tensor - l1 = (arr_1.shape[1] - 1) // 2 - l2 = (arr_2.shape[1] - 1) // 2 - cg_coeffs = cg_cache.coeffs[(l1, l2, lambda_)] - - # (samples None None l1_mu q) * (samples l2_mu p None None) - # -> (samples l2_mu p l1_mu q) we broadcast it in this way - # so we only need to do one swapaxes in the next step - arr_out = arr_1[:, None, None, :, :] * arr_2[:, :, :, None, None] - - # (samples l2_mu p l1_mu q) -> (samples q p l1_mu l2_mu) - arr_out = _dispatch.swapaxes(arr_out, 1, 4) - - # samples (q p l1_mu l2_mu) -> (samples (q p) (l1_mu l2_mu)) - arr_out = arr_out.reshape( - -1, - arr_1.shape[2] * arr_2.shape[2], - arr_1.shape[1] * arr_2.shape[1], - ) + # Infer l1 and l2 from the len of the length of axis 1 of each tensor + l1 = (arr_1.shape[1] - 1) // 2 + l2 = (arr_2.shape[1] - 1) // 2 + cg_coeffs = cg_cache_coeffs.get(l1, l2, lambda_) - # (l1_mu l2_mu lam_mu) -> ((l1_mu l2_mu) lam_mu) - cg_coeffs = cg_coeffs.reshape(-1, 2 * lambda_ + 1) + # (samples None None l1_mu q) * (samples l2_mu p None None) + # -> (samples l2_mu p l1_mu q) we broadcast it in this way + # so we only need to do one swapaxes in the next step + arr_out = arr_1[:, None, None, :, :] * arr_2[:, :, :, None, None] - # (samples (q p) (l1_mu l2_mu)) @ ((l1_mu l2_mu) lam_mu) - # -> samples (q p) lam_mu - arr_out = arr_out @ cg_coeffs + # (samples l2_mu p l1_mu q) -> (samples q p l1_mu l2_mu) + arr_out = _dispatch.swapaxes(arr_out, 1, 4) - # (samples (q p) lam_mu) -> (samples lam_mu (q p)) - return _dispatch.swapaxes(arr_out, 1, 2) + # samples (q p l1_mu l2_mu) -> (samples (q p) (l1_mu l2_mu)) + arr_out = arr_out.reshape( + -1, + arr_1.shape[2] * arr_2.shape[2], + arr_1.shape[1] * arr_2.shape[1], + ) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) + # (l1_mu l2_mu lam_mu) -> ((l1_mu l2_mu) lam_mu) + cg_coeffs = cg_coeffs.reshape(-1, 2 * lambda_ + 1) + + # (samples (q p) (l1_mu l2_mu)) @ ((l1_mu l2_mu) lam_mu) + # -> samples (q p) lam_mu + arr_out = arr_out @ cg_coeffs + + # (samples (q p) lam_mu) -> (samples lam_mu (q p)) + return _dispatch.swapaxes(arr_out, 1, 2) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py b/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py new file mode 100644 index 000000000..8ed5bdce5 --- /dev/null +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py @@ -0,0 +1,40 @@ +from typing import Any, Union + +import numpy as np +from metatensor import Labels, LabelsEntry, TensorBlock, TensorMap + + +def torch_jit_is_scripting(): + return False + + +def torch_jit_annotate(annotation, obj): + return obj + + +def is_labels(obj: Any): + return isinstance(obj, Labels) + + +check_isinstance = isinstance + +try: + from torch import Tensor as TorchTensor +except ImportError: + + class TorchTensor: + pass + + +Array = Union[np.ndarray, TorchTensor] + +__all__ = [ + "Labels", + "TensorBlock", + "TensorMap", + "LabelsEntry", + "torch_jit_is_scripting", + "torch_jit_annotate", + "check_isinstance", + "is_labels", +] diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index 3bb575cc0..a6359c810 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -4,12 +4,19 @@ metatensor :py:class:`TensorMap` objects. """ -import itertools from typing import List, Optional, Tuple, Union -from metatensor import Labels, TensorBlock, TensorMap - from . import _cg_cache, _dispatch +from ._classes import ( + Array, + Labels, + LabelsEntry, + TensorBlock, + TensorMap, + is_labels, + torch_jit_annotate, + torch_jit_is_scripting, +) # ================================================================== @@ -34,23 +41,27 @@ def _standardize_keys(tensor: TensorMap) -> TensorMap: if "species_neighbor" in tensor.keys.names: tensor = tensor.keys_to_properties(keys_to_move="species_neighbor") keys = tensor.keys.insert( - name="order_nu", - values=_dispatch.int_array_like([1], like=tensor.keys.values), index=0, + name="order_nu", + values=_dispatch.int_array_like( + len(tensor.keys.values) * [1], like=tensor.keys.values + ), ) keys = keys.insert( - name="inversion_sigma", - values=_dispatch.int_array_like([1], like=tensor.keys.values), index=1, + name="inversion_sigma", + values=_dispatch.int_array_like( + len(tensor.keys.values) * [1], like=tensor.keys.values + ), ) return TensorMap(keys=keys, blocks=[b.copy() for b in tensor.blocks()]) def _parse_selected_keys( n_iterations: int, + like: Array, angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Labels]]] = None, - like=None, + selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, ) -> List[Union[None, Labels]]: """ Parses the `selected_keys` argument passed to public functions. Checks the @@ -60,6 +71,39 @@ def _parse_selected_keys( `like` is required if a new :py:class:`Labels` object is to be created by :py:mod:`_dispatch`. """ + # Check the selected_keys + if ( + (selected_keys is not None) + and (not isinstance(selected_keys, list)) + and (not is_labels(selected_keys)) + ): + raise TypeError( + "`selected_keys` must be a `Labels` or List[Union[None, `Labels`]]" + ) + + if isinstance(selected_keys, list): + # Both if conditions check the same thing, the second is for metetensor-core and + # metatensor-torch, the first one for torch-scripted metatensor-torch + if torch_jit_is_scripting(): + if not all( + [ + isinstance(selected_keys[i], Labels) or (selected_keys[i] is None) + for i in range(len(selected_keys)) + ] + ): + raise TypeError( + "`selected_keys` must be a Labels or List[Union[None, Labels]]" + ) + elif not all( + [ + is_labels(selected_keys[i]) or (selected_keys[i] is None) + for i in range(len(selected_keys)) + ] + ): + raise TypeError( + "`selected_keys` must be a Labels or List[Union[None, Labels]]" + ) + # Check angular_cutoff arg if angular_cutoff is not None: if not isinstance(angular_cutoff, int): @@ -67,46 +111,66 @@ def _parse_selected_keys( if angular_cutoff < 1: raise ValueError("`angular_cutoff` must be >= 1") + # we use a new variable for selected_keys so TorchScript can infer correct type + selected_keys_: List[Union[None, Labels]] = [] + if selected_keys is None: if angular_cutoff is None: # no selections at all - selected_keys = [None] * n_iterations + selected_keys_ = [ + torch_jit_annotate(Union[None, Labels], None) + ] * n_iterations else: # Create a key selection with all angular channels <= the specified # angular cutoff - selected_keys = [ + label: Union[None, Labels] = torch_jit_annotate( + Union[None, Labels], Labels( names=["spherical_harmonics_l"], - values=_dispatch.int_range_like( - 0, angular_cutoff, like=like + values=_dispatch.int_array_like( + list(range(0, angular_cutoff)), like=like ).reshape(-1, 1), - ) - ] * n_iterations - - if isinstance(selected_keys, Labels): + ), + ) + selected_keys_ = [label] * n_iterations + + # Both if conditions check the same thing, we cannot write them out into one + # condition, because otherwise the TorchScript compiler cannot infer that + # selected_keys is Labels. We need both because isinstance(selected, Labels) works + # with metatensor-torch only when scripted + if torch_jit_is_scripting(): + if isinstance(selected_keys, Labels): + # Create a list, but only apply a key selection at the final iteration + selected_keys_ = [torch_jit_annotate(Union[None, Labels], None)] * ( + n_iterations - 1 + ) + selected_keys_.append(torch_jit_annotate(Labels, selected_keys)) + elif is_labels(selected_keys): # Create a list, but only apply a key selection at the final iteration - selected_keys = [None] * (n_iterations - 1) + [selected_keys] - - # Check the selected_keys - if not isinstance(selected_keys, List): - raise TypeError( - "`selected_keys` must be a `Labels` or List[Union[None, `Labels`]]" + selected_keys_ = [torch_jit_annotate(Union[None, Labels], None)] * ( + n_iterations - 1 ) - if not len(selected_keys) == n_iterations: + selected_keys_.append(torch_jit_annotate(Labels, selected_keys)) + elif isinstance(selected_keys, list): + selected_keys_ = selected_keys + + if not len(selected_keys_) == n_iterations: raise ValueError( "`selected_keys` must be a List[Union[None, Labels]] of length" " `correlation_order` - 1" ) - if not _dispatch.all( - [isinstance(val, (Labels, type(None))) for val in selected_keys] - ): - raise TypeError("`selected_keys` must be a Labels or List[Union[None, Labels]]") # Now iterate over each of the Labels (or None) in the list and check - for slct in selected_keys: + for slct in selected_keys_: if slct is None: continue - assert isinstance(slct, Labels) - if not _dispatch.all( + if torch_jit_is_scripting(): + if not (isinstance(slct, Labels)): + raise ValueError("Asserted that elements in `slct` are Labels") + else: + if not (is_labels(slct)): + raise ValueError("Asserted that elements in `slct` are Labels") + + if not all( [ name in ["spherical_harmonics_l", "inversion_sigma"] for name in slct.names @@ -118,33 +182,45 @@ def _parse_selected_keys( ) if "spherical_harmonics_l" in slct.names: if angular_cutoff is not None: - if not _dispatch.all( + below_cutoff: Array = ( slct.column("spherical_harmonics_l") <= angular_cutoff - ): + ) + if not _dispatch.all(below_cutoff): raise ValueError( "specified angular channels in `selected_keys` must be <= the" " specified `angular_cutoff`" ) - if not _dispatch.all( - [angular_l >= 0 for angular_l in slct.column("spherical_harmonics_l")] - ): + above_zero = _dispatch.bool_array_like( + [ + bool(angular_l >= 0) + for angular_l in slct.column("spherical_harmonics_l") + ], + like=like, + ) + if not _dispatch.all(above_zero): raise ValueError( "specified angular channels in `selected_keys` must be >= 0" ) if "inversion_sigma" in slct.names: if not _dispatch.all( - [parity_s in [-1, +1] for parity_s in slct.column("inversion_sigma")] + _dispatch.bool_array_like( + [ + bool(parity_s in [-1, 1]) + for parity_s in slct.column("inversion_sigma") + ], + like, + ) ): raise ValueError( "specified parities in `selected_keys` must be -1 or +1" ) - return selected_keys + return selected_keys_ def _parse_bool_iteration_filters( n_iterations: int, - skip_redundant: Optional[Union[bool, List[bool]]] = False, + skip_redundant: Union[bool, List[bool]] = False, output_selection: Optional[Union[bool, List[bool]]] = None, ) -> List[List[bool]]: """ @@ -152,10 +228,13 @@ def _parse_bool_iteration_filters( public functions. """ if isinstance(skip_redundant, bool): - skip_redundant = [skip_redundant] * n_iterations - if not _dispatch.all([isinstance(val, bool) for val in skip_redundant]): + skip_redundant_ = [skip_redundant] * n_iterations + else: + skip_redundant_ = skip_redundant + + if not all([isinstance(val, bool) for val in skip_redundant_]): raise TypeError("`skip_redundant` must be a `bool` or `list` of `bool`") - if not len(skip_redundant) == n_iterations: + if not len(skip_redundant_) == n_iterations: raise ValueError( "`skip_redundant` must be a bool or `list` of `bool` of length" " `correlation_order` - 1" @@ -165,7 +244,7 @@ def _parse_bool_iteration_filters( else: if isinstance(output_selection, bool): output_selection = [output_selection] * n_iterations - if not isinstance(output_selection, List): + if not isinstance(output_selection, list): raise TypeError("`output_selection` must be passed as `list` of `bool`") if not len(output_selection) == n_iterations: @@ -173,12 +252,12 @@ def _parse_bool_iteration_filters( "`output_selection` must be a ``list`` of ``bool`` of length" " corresponding to the number of CG iterations" ) - if not _dispatch.all([isinstance(v, bool) for v in output_selection]): + if not all([isinstance(v, bool) for v in output_selection]): raise TypeError("`output_selection` must be passed as a `list` of `bool`") - if not _dispatch.all([isinstance(v, bool) for v in output_selection]): + if not all([isinstance(v, bool) for v in output_selection]): raise TypeError("`output_selection` must be passed as a `list` of `bool`") - return skip_redundant, output_selection + return skip_redundant_, output_selection def _precompute_keys( @@ -187,7 +266,7 @@ def _precompute_keys( n_iterations: int, selected_keys: List[Union[None, Labels]], skip_redundant: List[bool], -) -> List[Tuple[Labels, List[List[int]]]]: +) -> List[Tuple[List[LabelsEntry], List[LabelsEntry], Labels]]: """ Computes all the keys metadata needed to perform `n_iterations` of CG combination steps. @@ -201,7 +280,7 @@ def _precompute_keys( If `skip_redundant` is True, then keys that represent redundant CG operations are not included in the output keys at each step. """ - keys_metadata = [] + keys_metadata: List[Tuple[List[LabelsEntry], List[LabelsEntry], Labels]] = [] keys_out = keys_1 for iteration in range(n_iterations): # Get the keys metadata for the combination of the 2 tensors @@ -209,12 +288,13 @@ def _precompute_keys( keys_1=keys_out, keys_2=keys_2, ) - if selected_keys[iteration] is not None: + selected_keys_i = selected_keys[iteration] + if selected_keys_i is not None: keys_1_entries, keys_2_entries, keys_out = _apply_key_selection( keys_1_entries, keys_2_entries, keys_out, - selected_keys=selected_keys[iteration], + selected_keys=selected_keys_i, ) if skip_redundant[iteration]: @@ -237,7 +317,8 @@ def _precompute_keys( def _precompute_keys_full_product( keys_1: Labels, keys_2: Labels -) -> Tuple[List, List, Labels]: +) -> Tuple[List[LabelsEntry], List[LabelsEntry], Labels]: + # Due to TorchScript we cannot use List[LabelsEntry] """ Given the keys of 2 TensorMaps, returns the keys that would be present after a full CG product of these TensorMaps. @@ -272,11 +353,11 @@ def _precompute_keys_full_product( `keys_2` must follow the key name convention: ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"] - Returned is Tuple[List, List, Labels]. The first two lists correspond to the - LabelsEntry objects of the keys being combined. The third element is a - Labels object corresponding to the keys of the output TensorMap. Each entry - in this Labels object corresponds to the keys is formed by combination of - the pair of blocks indexed by correspoding key pairs in the first two lists. + The first two lists of the returned value correspond to the LabelsEntry objects of + the keys being combined. The third element is a Labels object corresponding to the + keys of the output TensorMap. Each entry in this Labels object corresponds to the + keys is formed by combination of the pair of blocks indexed by correspoding key + pairs in the first two lists. """ # Get the correlation order of the first TensorMap. unique_nu = _dispatch.unique(keys_1.column("order_nu")) @@ -285,7 +366,7 @@ def _precompute_keys_full_product( "keys_1 must correspond to a tensor of a single correlation order." f" Found {len(unique_nu)} body orders: {unique_nu}" ) - nu1 = unique_nu[0] + nu1 = int(unique_nu[0]) # Define new correlation order of output TensorMap nu = nu1 + 1 @@ -295,23 +376,25 @@ def _precompute_keys_full_product( # If nu1 = 1, the key names don't yet have any "lx" columns if nu1 == 1: - l_list_names = [] + l_list_names: List[str] = [] new_l_list_names = ["l1", "l2"] else: l_list_names = [f"l{angular_l}" for angular_l in range(1, nu1 + 1)] new_l_list_names = l_list_names + [f"l{nu}"] # Check key names - assert _dispatch.all( - keys_1.names - == ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"] - + l_list_names - + [f"k{k}" for k in range(2, nu1)] - ) - assert _dispatch.all( - keys_2.names - == ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"] - ) + assert keys_1.names == [ + "order_nu", + "inversion_sigma", + "spherical_harmonics_l", + "species_center", + ] + l_list_names + [f"k{k}" for k in range(2, nu1)] + assert keys_2.names == [ + "order_nu", + "inversion_sigma", + "spherical_harmonics_l", + "species_center", + ] # Define key names of output Labels (i.e. for combined TensorMap) new_names = ( @@ -320,43 +403,66 @@ def _precompute_keys_full_product( + [f"k{k}" for k in range(2, nu)] ) - new_key_values = [] - keys_1_entries = [] - keys_2_entries = [] - for key_1, key_2 in itertools.product(keys_1, keys_2): - # Unpack relevant key values - sig1, lam1, a = key_1.values[1:4] - sig2, lam2, a2 = key_2.values[1:4] - - # Only combine blocks of the same chemical species - if a != a2: - continue - - # First calculate the possible non-zero angular channels that can be - # formed from combination of blocks of order `lam1` and `lam2`. This - # corresponds to values in the inclusive range { |lam1 - lam2|, ..., - # |lam1 + lam2| } - nonzero_lams = _dispatch.int_range_like( - abs(lam1 - lam2), abs(lam1 + lam2) + 1, like=key_1.values - ) - - # Now iterate over the non-zero angular channels and apply the custom - # selections - for lambda_ in nonzero_lams: - # Calculate new sigma - sig = sig1 * sig2 * (-1) ** (lam1 + lam2 + lambda_) - - # Extract the l and k lists from keys_1 - l_list = key_1.values[4 : 4 + nu1].tolist() - k_list = key_1.values[4 + nu1 :].tolist() + # Define key names of output Labels (i.e. for combined TensorMap) + new_names = ( + ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"] + + new_l_list_names + + [f"k{k}" for k in range(2, nu)] + ) - # Build the new keys values. l{nu} is `lam2`` (i.e. - # "spherical_harmonics_l" of the key from `keys_2`. k{nu-1} is - # `lam1` (i.e. "spherical_harmonics_l" of the key from `keys_1`). - new_vals = [nu, sig, lambda_, a] + l_list + [lam2] + k_list + [lam1] - new_key_values.append(new_vals) - keys_1_entries.append(key_1) - keys_2_entries.append(key_2) + new_key_values: List[List[int]] = [] + # Types are actually LabelsEntry, but TorchScript does not understand this. + keys_1_entries: List[LabelsEntry] = [] + keys_2_entries: List[LabelsEntry] = [] + for i in range(len(keys_1)): + for j in range(len(keys_2)): + key_1 = keys_1.entry(i) + key_2 = keys_2.entry(j) + # Unpack relevant key values + sig1 = int(keys_1.values[i, 1]) + lam1 = int(keys_1.values[i, 2]) + a = int(keys_1.values[i, 3]) + sig2 = int(keys_2.values[j, 1]) + lam2 = int(keys_2.values[j, 2]) + a2 = int(keys_2.values[j, 3]) + + # Only combine blocks of the same chemical species + if a != a2: + continue + + # First calculate the possible non-zero angular channels that can be + # formed from combination of blocks of order `lam1` and `lam2`. This + # corresponds to values in the inclusive range { |lam1 - lam2|, ..., + # |lam1 + lam2| } + min_lam: int = abs(lam1 - lam2) + max_lam: int = abs(lam1 + lam2) + 1 + nonzero_lams = list(range(min_lam, max_lam)) + + # Now iterate over the non-zero angular channels and apply the custom + # selections + for lambda_ in nonzero_lams: + # Calculate new sigma + sig = int(sig1 * sig2 * (-1) ** (lam1 + lam2 + lambda_)) + + # Extract the l and k lists from keys_1 + # We have to convert to int64 because of + # https://github.com/pytorch/pytorch/issues/76295 + l_list: List[int] = _dispatch.to_int_list(keys_1.values[i, 4 : 4 + nu1]) + k_list: List[int] = _dispatch.to_int_list(keys_1.values[i, 4 + nu1 :]) + + # Build the new keys values. l{nu} is `lam2`` (i.e. + # "spherical_harmonics_l" of the key from `keys_2`. k{nu-1} is + # `lam1` (i.e. "spherical_harmonics_l" of the key from `keys_1`). + new_vals: List[int] = ( + torch_jit_annotate(List[int], [nu, sig, lambda_, a]) + + l_list + + [lam2] + + k_list + + [lam1] + ) + new_key_values.append(new_vals) + keys_1_entries.append(key_1) + keys_2_entries.append(key_2) # Define new keys as the full product of keys_1 and keys_2 keys_out = Labels( @@ -368,8 +474,11 @@ def _precompute_keys_full_product( def _apply_key_selection( - keys_1_entries: List, keys_2_entries: List, keys_out: Labels, selected_keys: Labels -) -> Tuple[List, List, Labels]: + keys_1_entries: List[LabelsEntry], + keys_2_entries: List[LabelsEntry], + keys_out: Labels, + selected_keys: Labels, +) -> Tuple[List[LabelsEntry], List[LabelsEntry], Labels]: """ Applies a selection according to `selected_keys` to the keys of an output TensorMap `keys_out` produced by combination of blocks indexed by keys @@ -381,35 +490,45 @@ def _apply_key_selection( If a selection in `selected_keys` is not valid based on the keys in `keys_out`, an error is raised. """ - # Extract the relevant columns from `selected_keys` that the selection will - # be performed on - keys_out_vals = [[k[name] for name in selected_keys.names] for k in keys_out] + # Extract the relevant columns from `selected_keys` that the selection will be + # performed on + col_idx = _dispatch.int_array_like( + [keys_out.names.index(name) for name in selected_keys.names], keys_out.values + ) + keys_out_vals = keys_out.values[:, col_idx] # First check that all of the selected keys exist in the output keys for slct in selected_keys.values: - if not _dispatch.any([_dispatch.all(slct == k) for k in keys_out_vals]): + if not any( + [bool(all(slct == keys_out_vals[i])) for i in range(len(keys_out_vals))] + ): raise ValueError( f"selected key {selected_keys.names} = {slct} not found" " in the output keys. Check the `selected_keys` argument." ) # Build a mask of the selected keys - mask = [ - _dispatch.any([_dispatch.all(i == j) for j in selected_keys.values]) - for i in keys_out_vals - ] + mask = _dispatch.bool_array_like( + [any([bool(all(i == j)) for j in selected_keys.values]) for i in keys_out_vals], + like=selected_keys.values, + ) + mask_indices = _dispatch.int_array_like( + list(range(len(keys_1_entries))), like=selected_keys.values + )[mask] # Apply the mask to key entries and keys and return - keys_1_entries = [k for k, isin in zip(keys_1_entries, mask) if isin] - keys_2_entries = [k for k, isin in zip(keys_2_entries, mask) if isin] + keys_1_entries = [keys_1_entries[i] for i in mask_indices] + keys_2_entries = [keys_2_entries[i] for i in mask_indices] keys_out = Labels(names=keys_out.names, values=keys_out.values[mask]) return keys_1_entries, keys_2_entries, keys_out def _remove_redundant_keys( - keys_1_entries: List, keys_2_entries: List, keys_out: Labels -) -> Tuple[List, List, Labels]: + keys_1_entries: List[LabelsEntry], + keys_2_entries: List[LabelsEntry], + keys_out: Labels, +) -> Tuple[List[LabelsEntry], List[LabelsEntry], Labels]: """ For a Labels object `keys_out` that corresponds to the keys of a TensorMap formed by combined of the blocks described by the entries in the lists @@ -428,11 +547,13 @@ def _remove_redundant_keys( nu = nu1 + 1 # Identify keys of redundant blocks and remove them - key_idxs_to_keep = [] - for key_idx, key in enumerate(keys_out): + key_idxs_to_keep: List[int] = [] + for key_idx in range(len(keys_out)): + key = keys_out.entry(key_idx) # Get the important key values. This is all of the keys, excpet the k - # list - key_vals_slice = key.values[: 4 + (nu + 1)].tolist() + # list. We have to convert to int64 because of + # https://github.com/pytorch/pytorch/issues/76295 + key_vals_slice: List[int] = _dispatch.to_int_list(key.values[: 4 + (nu + 1)]) first_part, l_list = key_vals_slice[:4], key_vals_slice[4:] # Sort the l list @@ -441,18 +562,19 @@ def _remove_redundant_keys( # Compare the sliced key with the one recreated when the l list is # sorted. If they are identical, this is the key of the block that we # want to compute a CG combination for. - key_slice_tuple = tuple(first_part + l_list) - key_slice_sorted_tuple = tuple(first_part + l_list_sorted) - if _dispatch.all(key_slice_tuple == key_slice_sorted_tuple): + key_slice_tuple = _dispatch.int_array_like(first_part + l_list, like=key.values) + key_slice_sorted_tuple = _dispatch.int_array_like( + first_part + l_list_sorted, like=key.values + ) + if all(key_slice_tuple == key_slice_sorted_tuple): key_idxs_to_keep.append(key_idx) # Build a reduced Labels object for the combined keys, with redundancies removed keys_out_red = Labels( names=keys_out.names, - values=_dispatch.int_array_like( - [keys_out[idx].values for idx in key_idxs_to_keep], - like=keys_1_entries[0].values, - ), + values=keys_out.values[ + _dispatch.int_array_like(key_idxs_to_keep, like=keys_out.values) + ], ) # Store the list of reduced entries that combine to form the reduced output keys @@ -471,8 +593,7 @@ def _combine_blocks_same_samples( block_1: TensorBlock, block_2: TensorBlock, lambda_: int, - cg_cache, - compute_metadata_only: bool = False, + cg_cache: Union[_cg_cache.ClebschGordanReal, None], ) -> TensorBlock: """ For a given pair of TensorBlocks and desired angular channel, combines the @@ -480,14 +601,9 @@ def _combine_blocks_same_samples( """ # Do the CG combination - single center so no shape pre-processing required - if compute_metadata_only: - combined_values = _cg_cache.combine_arrays( - block_1.values, block_2.values, lambda_, cg_cache, return_empty_array=True - ) - else: - combined_values = _cg_cache.combine_arrays( - block_1.values, block_2.values, lambda_, cg_cache, return_empty_array=False - ) + combined_values = _cg_cache.combine_arrays( + block_1.values, block_2.values, lambda_, cg_cache + ) # Infer the new nu value: block 1's properties are nu pairs of # "species_neighbor_x" and "nx". @@ -496,7 +612,25 @@ def _combine_blocks_same_samples( # Define the new property names for "nx" and "species_neighbor_x" n_names = [f"n{i}" for i in range(1, combined_nu + 1)] neighbor_names = [f"species_neighbor_{i}" for i in range(1, combined_nu + 1)] - prop_names = [item for i in zip(neighbor_names, n_names) for item in i] + prop_names_zip = [ + [neighbor_names[i], n_names[i]] for i in range(len(neighbor_names)) + ] + prop_names: List[str] = [] + for i in range(len(prop_names_zip)): + prop_names.extend(prop_names_zip[i]) + + # create cross product list of indices in a torch-scriptable way of + # [[i, j] for i in range(len(block_1.properties.values)) for j in + # range(len(block_2.properties.values))] + # [0, 1, 2], [0, 1] -> [[0, 1], [0, 2], [1, 0], [1, 1], [2, 0], [2, 1]] + block_1_block_2_product_idx = _dispatch.cartesian_prod( + _dispatch.int_range_like( + 0, len(block_2.properties.values), like=block_2.values + ), + _dispatch.int_range_like( + 0, len(block_1.properties.values), like=block_1.values + ), + ) # Create a TensorBlock combined_block = TensorBlock( @@ -514,11 +648,11 @@ def _combine_blocks_same_samples( names=prop_names, values=_dispatch.int_array_like( [ - _dispatch.concatenate((b2, b1)) - for b2 in block_2.properties.values - for b1 in block_1.properties.values + _dispatch.to_int_list(block_2.properties.values[indices[0]]) + + _dispatch.to_int_list(block_1.properties.values[indices[1]]) + for indices in block_1_block_2_product_idx ], - like=block_1.values, + block_1.properties.values, ), ), ) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py b/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py index 839ea7f3e..417e1e756 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py @@ -2,18 +2,18 @@ Module containing dispatch functions for numpy/torch CG combination operations. """ -from typing import List, Optional +import itertools +from typing import List, Optional, Union import numpy as np +from ._classes import TorchTensor + try: import torch - from torch import Tensor as TorchTensor except ImportError: - - class TorchTensor: - pass + pass UNKNOWN_ARRAY_TYPE = ( @@ -21,6 +21,63 @@ class TorchTensor: ) +def _check_all_torch_tensor(arrays: List[TorchTensor]): + for array in arrays: + if not isinstance(array, TorchTensor): + raise TypeError( + f"expected argument to be a torch.Tensor, but got {type(array)}" + ) + + +def _check_all_np_ndarray(arrays): + for array in arrays: + if not isinstance(array, np.ndarray): + raise TypeError( + f"expected argument to be a np.ndarray, but got {type(array)}" + ) + + +def where(array): + """Return the indices where `array` is True. + + This function has the same behavior as ``np.where(array)``. + """ + if isinstance(array, TorchTensor): + return torch.where(array) + elif isinstance(array, np.ndarray): + return np.where(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def abs(array): + """ + Returns the absolute value of the elements in the array. + + It is equivalent of np.abs(array) and torch.abs(tensor) + """ + if isinstance(array, TorchTensor): + return torch.abs(array) + elif isinstance(array, np.ndarray): + return np.abs(array).astype(array.dtype) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def argsort(array): + """ + Returns the sorted arguments of the elements in the array. + + It is equivalent of np.argsort(array) and torch.argsort(tensor) + """ + if isinstance(array, TorchTensor): + return torch.argsort(array) + elif isinstance(array, np.ndarray): + return np.argsort(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + def unique(array, axis: Optional[int] = None): """Find the unique elements of an array.""" if isinstance(array, TorchTensor): @@ -29,9 +86,25 @@ def unique(array, axis: Optional[int] = None): return np.unique(array, axis=axis) -def int_range_like(min_val, max_val, like): - """Returns an array of integers from min to max, non-inclusive, based on the - type of `like`""" +def to_int_list(array) -> List[int]: + if isinstance(array, TorchTensor): + # we need to do it this way because of + # https://github.com/pytorch/pytorch/issues/76295 + return array.to(dtype=torch.int64).tolist() + elif isinstance(array, np.ndarray): + return array.tolist() + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def int_range_like(min_val: int, max_val: int, like): + """ + Returns an array of integers from min to max, non-inclusive, based on the type of + `like` + + It is equivalent of np.arange(start, end) and torch.arange(start, end) for the given + array dtype and device. + """ if isinstance(like, TorchTensor): return torch.arange(min_val, max_val, dtype=torch.int64, device=like.device) elif isinstance(like, np.ndarray): @@ -40,19 +113,62 @@ def int_range_like(min_val, max_val, like): raise TypeError(UNKNOWN_ARRAY_TYPE) -def int_array_like(int_list: List[int], like): +def int_array_like(int_list: Union[List[int], List[List[int]]], like): """ Converts the input list of int to a numpy array or torch tensor based on the type of `like`. """ if isinstance(like, TorchTensor): - return torch.tensor(int_list, dtype=torch.int64, device=like.device) + if torch.jit.isinstance(int_list, List[int]): + return torch.tensor(int_list, dtype=torch.int64, device=like.device) + else: + return torch.tensor(int_list, dtype=torch.int64, device=like.device) elif isinstance(like, np.ndarray): return np.array(int_list).astype(np.int64) else: raise TypeError(UNKNOWN_ARRAY_TYPE) +def double_array_like(int_list: List[float], like): + """ + Converts the input list of float to a numpy array or torch tensor + based on the array type of `like`. + """ + if isinstance(like, TorchTensor): + return torch.tensor(int_list, dtype=torch.float64, device=like.device) + elif isinstance(like, np.ndarray): + return np.array(int_list).astype(np.float64) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def bool_array_like(bool_list: List[bool], like): + """ + Converts the input list of bool to a numpy array or torch tensor + based on the type of `like`. + """ + if isinstance(like, TorchTensor): + return torch.tensor(bool_list, dtype=torch.bool, device=like.device) + elif isinstance(like, np.ndarray): + return np.array(bool_list).astype(bool) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def cartesian_prod(array1, array2): + """ + Imitates like itertools.product(array1, array2) + """ + if isinstance(array1, TorchTensor) and isinstance(array2, TorchTensor): + return torch.cartesian_prod(array1, array2) + elif isinstance(array1, np.ndarray) and isinstance(array2, np.ndarray): + # using itertools should be fastest way according to + # https://stackoverflow.com/a/28684982 + return np.array(list(itertools.product(array1, array2))) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + def concatenate(arrays, axis: Optional[int] = 0): """Concatenate arrays along an axis.""" if isinstance(arrays[0], TorchTensor): @@ -87,6 +203,36 @@ def all(array, axis: Optional[int] = None): raise TypeError(UNKNOWN_ARRAY_TYPE) +def max(array): + """ + Takes the maximun value of the array. + + This function has the same behavior as + ``np.max(array)`` or ``torch.max(array)``. + """ + if isinstance(array, TorchTensor): + return torch.max(input=array) + elif isinstance(array, np.ndarray): + return np.max(a=array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def max_axis(array, axis: int = 0): + """ + Takes the maximun values of the array along the axis. + + This function has the same behavior as + ``np.max(array, axis=axis)`` or ``torch.max(array, dim=axis)``. + """ + if isinstance(array, TorchTensor): + return torch.max(input=array, dim=axis) + elif isinstance(array, np.ndarray): + return np.max(a=array, axis=axis) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + def any(array): """Test whether any array elements along a given axis evaluate to True. @@ -105,21 +251,80 @@ def any(array): raise TypeError(UNKNOWN_ARRAY_TYPE) -def zeros_like(shape, like): - """Return an array of zeros with the same shape and type as a given array. +def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): + """ + Create an array filled with zeros, with the given ``shape``, and similar + dtype, device and other options as ``array``. - This function has the same behavior as - ``np.zeros_like(array)``. + If ``shape`` is :py:obj:`None`, the array shape is used instead. + ``requires_grad`` is only used for torch tensors, and set the corresponding + value on the returned array. + + This is the equivalent to ``np.zeros_like(array, shape=shape)``. """ - if isinstance(like, TorchTensor): + if isinstance(array, TorchTensor): + if shape is None: + shape = array.size() + return torch.zeros( shape, - requires_grad=like.requires_grad, - dtype=like.dtype, - device=like.device, + dtype=array.dtype, + layout=array.layout, + device=array.device, + ).requires_grad_(requires_grad) + elif isinstance(array, np.ndarray): + return np.zeros_like(array, shape=shape, subok=False) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): + """ + Create an empty array, with the given ``shape``, and similar + dtype, device and other options as ``array``. + + If ``shape`` is :py:obj:`None`, the array shape is used instead. + ``requires_grad`` is only used for torch tensors, and set the corresponding + value on the returned array. + + This is the equivalent to ``np.empty_like(array, shape=shape)``. + """ + if isinstance(array, TorchTensor): + if shape is None: + shape = array.size() + + return torch.empty( + shape, + dtype=array.dtype, + layout=array.layout, + device=array.device, + ).requires_grad_(requires_grad) + elif isinstance(array, np.ndarray): + return np.empty_like(array, shape=shape, subok=False) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def allclose( + a: TorchTensor, + b: TorchTensor, + rtol: float, + atol: float, + equal_nan: bool = False, +): + """Compare two arrays using ``allclose`` + + This function has the same behavior as + ``np.allclose(array1, array2, rtol, atol, equal_nan)``. + """ + if isinstance(a, TorchTensor): + _check_all_torch_tensor([b]) + return torch.allclose( + input=a, other=b, rtol=rtol, atol=atol, equal_nan=equal_nan ) - elif isinstance(like, np.ndarray): - return np.zeros(shape, dtype=like.dtype) + elif isinstance(a, np.ndarray): + _check_all_np_ndarray([b]) + return np.allclose(a=a, b=b, rtol=rtol, atol=atol, equal_nan=equal_nan) else: raise TypeError(UNKNOWN_ARRAY_TYPE) @@ -132,3 +337,48 @@ def swapaxes(array, axis0: int, axis1: int): return np.swapaxes(array, axis0, axis1) else: raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def conjugate(array): + """ + Conjugate the array + + This function has the same behavior as + ``np.conjugate(array)`` or ``torch.conj(array)``. + """ + if isinstance(array, TorchTensor): + return torch.conj(array) + elif isinstance(array, np.ndarray): + return np.conjugate(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def real(array): + """ + Takes the real part of the array + + This function has the same behavior as + ``np.real(array)`` or ``torch.real(array)``. + """ + if isinstance(array, TorchTensor): + return torch.real(array) + elif isinstance(array, np.ndarray): + return np.real(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def imag(array): + """ + Takes the imag part of the array + + This function has the same behavior as + ``np.imag(array)`` or ``torch.imag(array)``. + """ + if isinstance(array, TorchTensor): + return torch.imag(array) + elif isinstance(array, np.ndarray): + return np.imag(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index bf4c0c42f..9ba8f117f 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -6,9 +6,8 @@ from typing import List, Optional, Union -from metatensor import Labels, TensorMap - from . import _cg_cache, _clebsch_gordan, _dispatch +from ._classes import Labels, LabelsEntry, TensorBlock, TensorMap, TorchTensor # ====================================================================== @@ -20,8 +19,8 @@ def correlate_density( density: TensorMap, correlation_order: int, angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Labels]]] = None, - skip_redundant: Optional[Union[bool, List[bool]]] = False, + selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, + skip_redundant: Union[bool, List[bool]] = False, output_selection: Optional[Union[bool, List[bool]]] = None, ) -> Union[TensorMap, List[TensorMap]]: """ @@ -98,8 +97,8 @@ def correlate_density_metadata( density: TensorMap, correlation_order: int, angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Labels]]] = None, - skip_redundant: Optional[Union[bool, List[bool]]] = False, + selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, + skip_redundant: Union[bool, List[bool]] = False, output_selection: Optional[Union[bool, List[bool]]] = None, ) -> Union[TensorMap, List[TensorMap]]: """ @@ -129,8 +128,8 @@ def _correlate_density( density: TensorMap, correlation_order: int, angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Labels]]] = None, - skip_redundant: Optional[Union[bool, List[bool]]] = False, + selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, + skip_redundant: Union[bool, List[bool]] = False, output_selection: Optional[Union[bool, List[bool]]] = None, compute_metadata_only: bool = False, sparse: bool = True, @@ -143,25 +142,27 @@ def _correlate_density( if correlation_order <= 1: raise ValueError("`correlation_order` must be > 1") # TODO: implement combinations of gradients too - if _dispatch.any([len(list(block.gradients())) > 0 for block in density]): + # we have to create a bool array with dispatch to be TorchScript compatible + contains_gradients = all( + [len(list(block.gradients())) > 0 for _, block in density.items()] + ) + if contains_gradients: raise NotImplementedError( "Clebsch Gordan combinations with gradients not yet implemented." " Use metatensor.remove_gradients to remove gradients from the input." ) # Check metadata if not ( - _dispatch.all(density.keys.names == ["spherical_harmonics_l", "species_center"]) - or _dispatch.all( - density.keys.names - == ["spherical_harmonics_l", "species_center", "species_neighbor"] - ) + density.keys.names == ["spherical_harmonics_l", "species_center"] + or density.keys.names + == ["spherical_harmonics_l", "species_center", "species_neighbor"] ): raise ValueError( "input `density` must have key names" ' ["spherical_harmonics_l", "species_center"] or' ' ["spherical_harmonics_l", "species_center", "species_neighbor"]' ) - if not _dispatch.all(density.component_names == ["spherical_harmonics_m"]): + if not density.component_names == ["spherical_harmonics_m"]: raise ValueError( "input `density` must have a single component" " axis with name `spherical_harmonics_m`" @@ -171,11 +172,11 @@ def _correlate_density( density_correlation = density # create a copy to combine with itself # Parse the selected keys - selected_keys = _clebsch_gordan._parse_selected_keys( + selected_keys_: List[Union[Labels, None]] = _clebsch_gordan._parse_selected_keys( n_iterations=n_iterations, + like=density.keys.values, angular_cutoff=angular_cutoff, selected_keys=selected_keys, - like=density.keys.values, ) # Parse the bool flags that control skipping of redundant CG combinations # and TensorMap output from each iteration @@ -190,7 +191,7 @@ def _correlate_density( density.keys, density.keys, n_iterations=n_iterations, - selected_keys=selected_keys, + selected_keys=selected_keys_, skip_redundant=skip_redundant, ) # Compute CG coefficient cache @@ -198,32 +199,42 @@ def _correlate_density( cg_cache = None else: angular_max = max( - _dispatch.concatenate( - [density.keys.column("spherical_harmonics_l")] - + [mdata[2].column("spherical_harmonics_l") for mdata in key_metadata] - ) + _dispatch.max(density.keys.column("spherical_harmonics_l")), + max( + [ + int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) + for mdata in key_metadata + ] + ), ) # TODO: keys have been precomputed, so perhaps we don't need to # compute all CG coefficients up to angular_max here. # TODO: use sparse cache by default until we understand under which # circumstances (and if) dense is faster. - cg_cache = _cg_cache.ClebschGordanReal(angular_max, sparse=sparse) + cg_cache = _cg_cache.ClebschGordanReal( + angular_max, + sparse=sparse, + use_torch=isinstance(density[0].values, TorchTensor), + ) # Perform iterative CG tensor products - density_correlations = [] + density_correlations: List[TensorMap] = [] for iteration in range(n_iterations): # Define the correlation order of the current iteration correlation_order_it = iteration + 2 # Combine block pairs - blocks_out = [] - for key_1, key_2, key_out in zip(*key_metadata[iteration]): + blocks_out: List[TensorBlock] = [] + key_metadata_i = key_metadata[iteration] + for j in range(len(key_metadata_i[0])): + key_1: LabelsEntry = key_metadata_i[0][j] + key_2: LabelsEntry = key_metadata_i[1][j] + lambda_out: int = int(key_metadata_i[2].column("spherical_harmonics_l")[j]) block_out = _clebsch_gordan._combine_blocks_same_samples( - density_correlation[key_1], - density[key_2], - key_out["spherical_harmonics_l"], + density_correlation.block(key_1), + density.block(key_2), + lambda_out, cg_cache, - compute_metadata_only=compute_metadata_only, ) blocks_out.append(block_out) keys_out = key_metadata[iteration][2] diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index 7822d159e..d72dcbe99 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -9,6 +9,7 @@ import rascaline from rascaline.utils import PowerSpectrum +from rascaline.utils.clebsch_gordan import _dispatch from rascaline.utils.clebsch_gordan._cg_cache import ClebschGordanReal from rascaline.utils.clebsch_gordan._clebsch_gordan import _standardize_keys from rascaline.utils.clebsch_gordan.correlate_density import ( @@ -40,6 +41,28 @@ from .rotations import WignerDReal, transform_frame_o3, transform_frame_so3 +try: + import torch + from torch import Tensor as TorchTensor + + torch_dtype = torch.dtype + torch_device = torch.device + + HAS_TORCH = True +except ImportError: + HAS_TORCH = False + + # PR TODO below needed? + class TorchTensor: + pass + + class torch_dtype: + pass + + class torch_device: + pass + + DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") SPHEX_HYPERS = { @@ -68,12 +91,12 @@ @pytest.fixture() def cg_cache_sparse(): - return ClebschGordanReal(lambda_max=5, sparse=True) + return ClebschGordanReal(lambda_max=5, sparse=True, use_torch=HAS_TORCH) @pytest.fixture() def cg_cache_dense(): - return ClebschGordanReal(lambda_max=5, sparse=False) + return ClebschGordanReal(lambda_max=5, sparse=False, use_torch=HAS_TORCH) # ============ Helper functions ============ @@ -360,32 +383,60 @@ def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2): lam_min = abs(l1 - l2) lam_max = l1 + l2 + if HAS_TORCH: + int64_like = torch.empty(0, dtype=torch.int64) + float64_like = torch.empty(0, dtype=torch.float64) + bool_like = torch.empty(0, dtype=torch.bool) + else: + int64_like = np.empty(0, dtype=np.int64) + float64_like = np.empty(0, dtype=np.float64) + bool_like = np.empty(0, dtype=np.bool_) # We test lam dimension # \sum_{-m1 \leq l1 \leq m1, -m2 \leq l2 \leq m2} # <λμ|l1m1,l2m2> = δ_μμ' for lam in range(lam_min, lam_max): - cg_mat = cg_cache_dense.coeffs[(l1, l2, lam)].reshape(-1, 2 * lam + 1) + cg_mat = cg_cache_dense.coeffs.get(l1, l2, lam).reshape(-1, 2 * lam + 1) dot_product = cg_mat.T @ cg_mat - diag_mask = np.zeros(dot_product.shape, dtype=np.bool_) - diag_mask[np.diag_indices(len(dot_product))] = True - assert np.allclose( - dot_product[~diag_mask], np.zeros(dot_product.shape)[~diag_mask] + diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape) + diag_indices = ( + _dispatch.int_range_like(0, len(dot_product), int64_like), + _dispatch.int_range_like(0, len(dot_product), int64_like), + ) + diag_mask[diag_indices] = True + assert _dispatch.allclose( + dot_product[~diag_mask], + _dispatch.zeros_like(float64_like, dot_product.shape)[~diag_mask], + rtol=1e-05, + atol=1e-08, + ) + assert _dispatch.allclose( + dot_product[diag_mask], dot_product[diag_mask][0:1], rtol=1e-05, atol=1e-08 ) - assert np.allclose(dot_product[diag_mask], dot_product[diag_mask][0]) # We test l1 l2 dimension # \sum_{|l1-l2| \leq λ \leq l1+l2} \sum_{-μ \leq λ \leq μ} # <λμ|l1m1,l2m2> = δ_m1m1' δ_m2m2' l1l2_dim = (2 * l1 + 1) * (2 * l2 + 1) - dot_product = np.zeros((l1l2_dim, l1l2_dim)) + dot_product = _dispatch.zeros_like(float64_like, (l1l2_dim, l1l2_dim)) for lam in range(lam_min, lam_max + 1): - cg_mat = cg_cache_dense.coeffs[(l1, l2, lam)].reshape(-1, 2 * lam + 1) + cg_mat = cg_cache_dense.coeffs.get(l1, l2, lam).reshape(-1, 2 * lam + 1) dot_product += cg_mat @ cg_mat.T - diag_mask = np.zeros(dot_product.shape, dtype=np.bool_) - diag_mask[np.diag_indices(len(dot_product))] = True + diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape) + diag_indices = ( + _dispatch.int_range_like(0, len(dot_product), int64_like), + _dispatch.int_range_like(0, len(dot_product), int64_like), + ) + diag_mask[diag_indices] = True - assert np.allclose(dot_product[~diag_mask], np.zeros(dot_product.shape)[~diag_mask]) - assert np.allclose(dot_product[diag_mask], dot_product[diag_mask][0]) + assert _dispatch.allclose( + dot_product[~diag_mask], + _dispatch.zeros_like(float64_like, dot_product.shape)[~diag_mask], + rtol=1e-05, + atol=1e-08, + ) + assert _dispatch.allclose( + dot_product[diag_mask], dot_product[diag_mask][0:1], rtol=1e-05, atol=1e-08 + ) @pytest.mark.skipif( diff --git a/tox.ini b/tox.ini index f366bb2ee..b0d4c6e60 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,7 @@ envlist = lint min-deps all-deps + all-deps-torch docs-tests torch-tests @@ -67,6 +68,16 @@ deps = commands = pytest {[testenv]test_options} {posargs} +[testenv:all-deps-torch] +# note: platform_system can be "Linux","Darwin", or "Windows". +description = + Run Python unit tests with all dependencies installed (ase, pyscf, + and chemfiles are optional dependencies) and torch +deps = + {[testenv:all-deps]deps} + torch +commands = + pytest {[testenv]test_options} {posargs} [testenv:min-deps] description = Run Python unit tests with the minimal dependencies installed @@ -84,6 +95,7 @@ description = Run Python unit tests using torch deps = {[testenv]metatensor-torch-requirement} + metatensor-operations pytest pytest-cov numpy From e67d917ff95db788525f68dbc2465cc00a0069a3 Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Tue, 13 Feb 2024 21:05:06 +0100 Subject: [PATCH 02/23] checkpoint all-deps and all-deps-torch tests passing --- python/rascaline/rascaline/utils/__init__.py | 2 +- .../utils/clebsch_gordan/__init__.py | 5 +- .../utils/clebsch_gordan/_cg_cache.py | 46 +- .../utils/clebsch_gordan/_classes.py | 6 + .../utils/clebsch_gordan/_clebsch_gordan.py | 23 +- .../utils/clebsch_gordan/correlate_density.py | 503 +++++++++++------- .../tests/utils/correlate_density.py | 137 +++-- tox.ini | 6 +- 8 files changed, 417 insertions(+), 311 deletions(-) diff --git a/python/rascaline/rascaline/utils/__init__.py b/python/rascaline/rascaline/utils/__init__.py index e3e8adc1b..b6bfe14ff 100644 --- a/python/rascaline/rascaline/utils/__init__.py +++ b/python/rascaline/rascaline/utils/__init__.py @@ -1,6 +1,6 @@ import os -from .clebsch_gordan import correlate_density, correlate_density_metadata # noqa +from .clebsch_gordan import DensityCorrelations # noqa from .power_spectrum import PowerSpectrum # noqa from .splines import ( # noqa AtomicDensityBase, diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py index fca7f33a2..dbdc2e5eb 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py @@ -1,7 +1,6 @@ -from .correlate_density import correlate_density, correlate_density_metadata # noqa +from .correlate_density import DensityCorrelations # noqa __all__ = [ - "correlate_density", - "correlate_density_metadata", + "DensityCorrelations", ] diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index 1ec80a159..cdb043800 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -10,29 +10,15 @@ import wigners from . import _dispatch -from ._classes import Array, torch_jit_annotate, torch_jit_is_scripting +from ._classes import Array, torch_jit_annotate, torch_jit_is_scripting, TorchModule try: from mops import sparse_accumulation_of_products as sap # noqa F401 - # We need to define a variable that is globally accessible in this way to be - # compatible with torch script - class MOPS_CONFIG: - def __init__(self): - return - - def is_installed(self) -> bool: - return True - + HAS_MOPS = True except ImportError: - - class MOPS_CONFIG: - def __init__(self): - return - - def is_installed(self) -> bool: - return False + HAS_MOPS = False try: @@ -66,7 +52,7 @@ class torch_device: # ================================= -class ClebschGordanReal: +class ClebschGordanReal(TorchModule): """ Class for computing Clebsch-Gordan coefficients for real spherical harmonics. @@ -170,13 +156,14 @@ def __init__( use_mops: Optional[bool] = None, use_torch: bool = False, ): + super().__init__() self._lambda_max = lambda_max # For TorchScript we declare type self._use_mops: bool = False if sparse: if use_mops is None: - self._use_mops = MOPS_CONFIG().is_installed() + self._use_mops = HAS_MOPS # TODO: provide a warning once Mops is fully ready # import warnings # warnings.warn( @@ -186,7 +173,7 @@ def __init__( # " Falling back to numpy for now." # ) else: - if use_mops and not MOPS_CONFIG().is_installed(): + if use_mops and not HAS_MOPS: raise ImportError("Specified to use MOPS, but it is not installed.") else: self._use_mops = use_mops @@ -390,7 +377,7 @@ def _build_cg_coeff_dict( ) # Till MOPS does not TorchScript support we disable the scripting # of this part here. - if not torch_jit_is_scripting() and use_mops: + if use_mops: # Store CG coeffs in a specific format for use in # MOPS. Here we need the m1, m2, mu, and CG coeffs # to be stored as separate 1D arrays. @@ -547,7 +534,7 @@ def combine_arrays( arr_1: Array, arr_2: Array, lambda_: int, - cg_cache: Union[ClebschGordanReal, None], + cg_coeffs: Union[SparseCgDict, DenseCgDict, None], ) -> Array: """ Couples arrays `arr_1` and `arr_2` corresponding to the irreducible @@ -591,15 +578,14 @@ def combine_arrays( :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] """ # If just precomputing metadata, return an empty array - if cg_cache is None: + if cg_coeffs is None: return empty_combine(arr_1, arr_2, lambda_) # We have to temporary store it so TorchScript can infer the correct type - cg_cache_coeffs = cg_cache.coeffs - if isinstance(cg_cache_coeffs, SparseCgDict): - return sparse_combine(arr_1, arr_2, lambda_, cg_cache_coeffs) - elif isinstance(cg_cache_coeffs, DenseCgDict): - return dense_combine(arr_1, arr_2, lambda_, cg_cache_coeffs) + if isinstance(cg_coeffs, SparseCgDict): + return sparse_combine(arr_1, arr_2, lambda_, cg_coeffs) + elif isinstance(cg_coeffs, DenseCgDict): + return dense_combine(arr_1, arr_2, lambda_, cg_coeffs) else: raise ValueError( "Wrong type of cg coeffs, found type {type(cg_cache.coeffs)}," @@ -660,7 +646,7 @@ def sparse_combine( n_p = arr_1.shape[2] # number of properties in arr_1 n_q = arr_2.shape[2] # number of properties in arr_2 - if isinstance(arr_1, TorchTensor) or not MOPS_CONFIG().is_installed(): + if isinstance(arr_1, TorchTensor) or not HAS_MOPS: # Initialise output array arr_out = _dispatch.zeros_like(arr_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) @@ -680,7 +666,7 @@ def sparse_combine( ).reshape(n_i, n_p * n_q) return arr_out - elif isinstance(arr_1, np.ndarray) and MOPS_CONFIG().is_installed(): + elif isinstance(arr_1, np.ndarray) and HAS_MOPS: # Reshape arr_1 = np.repeat(arr_1[:, :, :, None], n_q, axis=3).reshape( n_i, 2 * l1 + 1, n_p * n_q diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py b/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py index 8ed5bdce5..02e5672a6 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py @@ -20,11 +20,17 @@ def is_labels(obj: Any): try: from torch import Tensor as TorchTensor + from torch.nn import Module as TorchModule except ImportError: class TorchTensor: pass + class TorchModule: + + def __call__(self, *arg, **kwargs): + return self.forward(*arg, **kwargs) + Array = Union[np.ndarray, TorchTensor] diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index a6359c810..fa392ea41 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -18,7 +18,6 @@ torch_jit_is_scripting, ) - # ================================================================== # ===== Functions to handle metadata # ================================================================== @@ -59,17 +58,15 @@ def _standardize_keys(tensor: TensorMap) -> TensorMap: def _parse_selected_keys( n_iterations: int, - like: Array, + array_like: Array, angular_cutoff: Optional[int] = None, selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, ) -> List[Union[None, Labels]]: """ Parses the `selected_keys` argument passed to public functions. Checks the values and returns a :py:class:`list` of :py:class:`Labels` objects, one for - each iteration of CG combination. - - `like` is required if a new :py:class:`Labels` object is to be created by - :py:mod:`_dispatch`. + each iteration of CG combination.The `:param array_like:` determines the + array backend of the Labels created """ # Check the selected_keys if ( @@ -78,7 +75,7 @@ def _parse_selected_keys( and (not is_labels(selected_keys)) ): raise TypeError( - "`selected_keys` must be a `Labels` or List[Union[None, `Labels`]]" + "`selected_keys` must be `None`, `Labels` or List[Union[None, `Labels`]]" ) if isinstance(selected_keys, list): @@ -127,7 +124,7 @@ def _parse_selected_keys( Labels( names=["spherical_harmonics_l"], values=_dispatch.int_array_like( - list(range(0, angular_cutoff)), like=like + list(range(0, angular_cutoff)), like=array_like ).reshape(-1, 1), ), ) @@ -195,7 +192,7 @@ def _parse_selected_keys( bool(angular_l >= 0) for angular_l in slct.column("spherical_harmonics_l") ], - like=like, + like=array_like, ) if not _dispatch.all(above_zero): raise ValueError( @@ -208,7 +205,7 @@ def _parse_selected_keys( bool(parity_s in [-1, 1]) for parity_s in slct.column("inversion_sigma") ], - like, + array_like, ) ): raise ValueError( @@ -222,7 +219,7 @@ def _parse_bool_iteration_filters( n_iterations: int, skip_redundant: Union[bool, List[bool]] = False, output_selection: Optional[Union[bool, List[bool]]] = None, -) -> List[List[bool]]: +) -> Tuple[List[bool], List[bool]]: """ Parses the `skip_redundant` and `output_selection` arguments passed to public functions. @@ -593,7 +590,7 @@ def _combine_blocks_same_samples( block_1: TensorBlock, block_2: TensorBlock, lambda_: int, - cg_cache: Union[_cg_cache.ClebschGordanReal, None], + cg_coeffs: Union[_cg_cache.SparseCgDict, _cg_cache.DenseCgDict, None], ) -> TensorBlock: """ For a given pair of TensorBlocks and desired angular channel, combines the @@ -602,7 +599,7 @@ def _combine_blocks_same_samples( # Do the CG combination - single center so no shape pre-processing required combined_values = _cg_cache.combine_arrays( - block_1.values, block_2.values, lambda_, cg_cache + block_1.values, block_2.values, lambda_, cg_coeffs ) # Infer the new nu value: block 1's properties are nu pairs of diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 9ba8f117f..525808932 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -5,24 +5,30 @@ """ from typing import List, Optional, Union +import numpy as np from . import _cg_cache, _clebsch_gordan, _dispatch -from ._classes import Labels, LabelsEntry, TensorBlock, TensorMap, TorchTensor +from ._classes import Labels, LabelsEntry, TensorBlock, TensorMap, TorchTensor, TorchModule, torch_jit_is_scripting +try: + from mops import sparse_accumulation_of_products as sap # noqa F401 + + HAS_MOPS = True +except ImportError: + HAS_MOPS = False + +try: + import torch + + HAS_TORCH = True +except ImportError: + HAS_TORCH = False # ====================================================================== # ===== Public API functions # ====================================================================== - -def correlate_density( - density: TensorMap, - correlation_order: int, - angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, - skip_redundant: Union[bool, List[bool]] = False, - output_selection: Optional[Union[bool, List[bool]]] = None, -) -> Union[TensorMap, List[TensorMap]]: +class DensityCorrelations(TorchModule): """ Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor with itself up to the desired correlation order. Returns @@ -45,11 +51,6 @@ def correlate_density( controlled with arguments `angular_cutoff`, `angular_selection` and `parity_selection`. - :param density: A density descriptor of body order 2 (correlation order 1), - in :py:class:`TensorMap` format. This may be, for example, a rascaline - :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. - Alternatively, this could be multi-center descriptor, such as a pair - density. :param correlation_order: The desired correlation order of the output descriptor. Must be >= 1. :param angular_cutoff: The maximum angular channel to compute at any given @@ -75,197 +76,315 @@ def correlate_density( will be returned. If a :py:class:`list` of :py:class:`bool` is passed, this controls the output at each corresponding iteration. If None is passed, only the final iteration is output. + :param arrays_backend: Determines the array backend be "numpy" or "torch" + :param cg_combine_backend: Determines the backend for the CG combination. It can + be even "python-sparse", "python-dense" or "mops". If the CG combination + performs on the sparse coefficients, it means that for each (l1, l2, lambda) + block the (m1, m2, mu) coefficients are stored in a sparse format only storing + the nonzero coefficients. If the parameter are None, the most optimal choice is + determined given available packages and ``arrays_backend``. + - "python-dense": Uses the python implementation performing the combinations + with the dense CG coefficients. + - "python-sparse": Uses the python implementation performing the combinations + with the sparse CG coefficients. + - "mops": Uses the package ``mops`` that optimized the sparse combinations. + At the moment it is only available with "numpy" as ``arrays_backend`` :return: A :py:class:`list` of :py:class:`TensorMap` corresponding to the density correlations output from the specified iterations. If the output from a single iteration is requested, a :py:class:`TensorMap` is returned instead. """ - return _correlate_density( - density, - correlation_order, - angular_cutoff, - selected_keys, - skip_redundant, - output_selection, - compute_metadata_only=False, - sparse=True, # sparse CG cache by default - ) - - -def correlate_density_metadata( - density: TensorMap, - correlation_order: int, - angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, - skip_redundant: Union[bool, List[bool]] = False, - output_selection: Optional[Union[bool, List[bool]]] = None, -) -> Union[TensorMap, List[TensorMap]]: - """ - Returns the metadata-only :py:class:`TensorMap`(s) that would be output by - the function :py:func:`correlate_density` under the same settings, without - perfoming the actual Clebsch-Gordan tensor products. See this function for - full documentation. - """ - - return _correlate_density( - density, - correlation_order, - angular_cutoff, - selected_keys, - skip_redundant, - output_selection, - compute_metadata_only=True, - ) - - -# ==================================================================== -# ===== Private functions that do the work on the TensorMap level -# ==================================================================== - - -def _correlate_density( - density: TensorMap, - correlation_order: int, - angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, - skip_redundant: Union[bool, List[bool]] = False, - output_selection: Optional[Union[bool, List[bool]]] = None, - compute_metadata_only: bool = False, - sparse: bool = True, -) -> Union[TensorMap, List[TensorMap]]: - """ - Performs the density correlations for public functions - :py:func:`correlate_density` and :py:func:`correlate_density_metadata`. - """ - # Check inputs - if correlation_order <= 1: - raise ValueError("`correlation_order` must be > 1") - # TODO: implement combinations of gradients too - # we have to create a bool array with dispatch to be TorchScript compatible - contains_gradients = all( - [len(list(block.gradients())) > 0 for _, block in density.items()] - ) - if contains_gradients: - raise NotImplementedError( - "Clebsch Gordan combinations with gradients not yet implemented." - " Use metatensor.remove_gradients to remove gradients from the input." - ) - # Check metadata - if not ( - density.keys.names == ["spherical_harmonics_l", "species_center"] - or density.keys.names - == ["spherical_harmonics_l", "species_center", "species_neighbor"] + def __init__( + self, + max_angular: int, + correlation_order: int, + angular_cutoff: Optional[int] = None, + selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, + skip_redundant: Optional[Union[bool, List[bool]]] = False, + output_selection: Optional[Union[bool, List[bool]]] = None, + arrays_backend: Optional[str] = None, + cg_combine_backend: Optional[str] = None, ): - raise ValueError( - "input `density` must have key names" - ' ["spherical_harmonics_l", "species_center"] or' - ' ["spherical_harmonics_l", "species_center", "species_neighbor"]' - ) - if not density.component_names == ["spherical_harmonics_m"]: - raise ValueError( - "input `density` must have a single component" - " axis with name `spherical_harmonics_m`" - ) - n_iterations = correlation_order - 1 # num iterations - density = _clebsch_gordan._standardize_keys(density) # standardize metadata - density_correlation = density # create a copy to combine with itself - - # Parse the selected keys - selected_keys_: List[Union[Labels, None]] = _clebsch_gordan._parse_selected_keys( - n_iterations=n_iterations, - like=density.keys.values, - angular_cutoff=angular_cutoff, - selected_keys=selected_keys, - ) - # Parse the bool flags that control skipping of redundant CG combinations - # and TensorMap output from each iteration - skip_redundant, output_selection = _clebsch_gordan._parse_bool_iteration_filters( - n_iterations, - skip_redundant=skip_redundant, - output_selection=output_selection, - ) - - # Pre-compute the keys needed to perform each CG iteration - key_metadata = _clebsch_gordan._precompute_keys( - density.keys, - density.keys, - n_iterations=n_iterations, - selected_keys=selected_keys_, - skip_redundant=skip_redundant, - ) - # Compute CG coefficient cache - if compute_metadata_only: - cg_cache = None - else: - angular_max = max( - _dispatch.max(density.keys.column("spherical_harmonics_l")), - max( - [ - int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) - for mdata in key_metadata - ] - ), - ) + super().__init__() + if arrays_backend is None: + if torch_jit_is_scripting(): + self._arrays_backend = "torch" + else: + self._arrays_backend = "numpy" + elif arrays_backend == "numpy": + if torch_jit_is_scripting(): + raise ValueError( + "Module is torch scripted but 'numpy' was given as `arrays_backend`" + ) + self._arrays_backend = "numpy" + elif arrays_backend == "torch": + self._arrays_backend = "torch" + else: + raise ValueError( + f"Unkown `arrays_backend` {arrays_backend}." + "Only 'numpy' and 'torch' are supported.") + + # Choosing the optimal cg combine backend + if cg_combine_backend is None: + if self._arrays_backend == "torch": + self._cg_combine_backend = "python-dense" + if self._arrays_backend == "numpy" and HAS_MOPS: + self._cg_combine_backend = "mops" + else: + self._cg_combine_backend = "python-sparse" + elif cg_combine_backend == "python-dense": + self._cg_combine_backend = "python-dense" + elif cg_combine_backend == "python-sparse": + self._cg_combine_backend = "python-sparse" + elif cg_combine_backend == "mops": + if self._arrays_backend == "torch": + raise NotImplementedError( + "'numpy' was determined or given as `arrays_backend` " + "and 'mops' was given as `cg_combine_backend`, " + "but mops does not support torch backend yet" + ) + else: + raise ValueError( + f"Unkown `cg_combined_backend` {cg_combined_backend}." + "Only 'python-dense', 'python-sparse' and 'mops' are supported.") + + if max_angular < 0: + raise ValueError(f"Given `max_angular={max_angular}` negative. " + "Must be greater equal 0.") + self._max_angular = max_angular + + if self._cg_combine_backend == "python-dense": + sparse = False + use_mops = False + elif self._cg_combine_backend == "python-sparse": + sparse = True + use_mops = False + elif self._cg_combine_backend == "mops": + sparse = True + use_mops = True + + self._cg_coeffs = _cg_cache.ClebschGordanReal( + self._max_angular, + sparse=sparse, + use_mops=use_mops, + use_torch=(self._arrays_backend == "torch"), + ).coeffs + + # Check inputs + if correlation_order <= 1: + raise ValueError("`correlation_order` must be > 1") + self._correlation_order = correlation_order + + n_iterations = correlation_order - 1 # num iterations + # Parse the selected keys + self._angular_cutoff = angular_cutoff + + if self._arrays_backend == "torch": + array_like = torch.empty(0) + elif self._arrays_backend == "numpy": + array_like = np.empty(0) + + self._selected_keys : List[Union[Labels, None]] = \ + _clebsch_gordan._parse_selected_keys( + n_iterations=n_iterations, + array_like=array_like, + angular_cutoff=self._angular_cutoff, + selected_keys=selected_keys, + ) + # Parse the bool flags that control skipping of redundant CG combinations + # and TensorMap output from each iteration + self._skip_redundant, self._output_selection = \ + _clebsch_gordan._parse_bool_iteration_filters( + n_iterations, + skip_redundant=skip_redundant, + output_selection=output_selection, + ) + + # Compute CG coefficient cache # TODO: keys have been precomputed, so perhaps we don't need to - # compute all CG coefficients up to angular_max here. + # compute all CG coefficients up to max_angular here. # TODO: use sparse cache by default until we understand under which - # circumstances (and if) dense is faster. - cg_cache = _cg_cache.ClebschGordanReal( - angular_max, - sparse=sparse, - use_torch=isinstance(density[0].values, TorchTensor), + # circumstances (and if) dense is faster + + @property + def arrays_backend(self): + return self._arrays_backend + + @property + def cg_combine_backend(self): + return self._cg_combine_backend + + @property + def cg_coeffs(self) -> Union[_cg_cache.SparseCgDict, _cg_cache.DenseCgDict]: + return self._cg_coeffs + + def forward(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: + return self.compute(density) + + def compute(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: + """ + Performs the density correlations for public functions + :py:func:`correlate_density` and :py:func:`correlate_density_metadata`. + + :param density: A density descriptor of body order 2 (correlation order 1), + in :py:class:`TensorMap` format. This may be, for example, a rascaline + :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. + Alternatively, this could be multi-center descriptor, such as a pair + density. + """ + return self._correlate_density( + density, + self._correlation_order, + self._selected_keys, + self._skip_redundant, + self._output_selection, + self._cg_coeffs, ) - # Perform iterative CG tensor products - density_correlations: List[TensorMap] = [] - for iteration in range(n_iterations): - # Define the correlation order of the current iteration - correlation_order_it = iteration + 2 - - # Combine block pairs - blocks_out: List[TensorBlock] = [] - key_metadata_i = key_metadata[iteration] - for j in range(len(key_metadata_i[0])): - key_1: LabelsEntry = key_metadata_i[0][j] - key_2: LabelsEntry = key_metadata_i[1][j] - lambda_out: int = int(key_metadata_i[2].column("spherical_harmonics_l")[j]) - block_out = _clebsch_gordan._combine_blocks_same_samples( - density_correlation.block(key_1), - density.block(key_2), - lambda_out, - cg_cache, + + def compute_metadata( + self, + density: TensorMap, + ) -> Union[TensorMap, List[TensorMap]]: + """ + Returns the metadata-only :py:class:`TensorMap`(s) that would be output by + the function :py:func:`correlate_density` under the same settings, without + perfoming the actual Clebsch-Gordan tensor products. See this function for + full documentation. + + :param density: A density descriptor of body order 2 (correlation order 1), + in :py:class:`TensorMap` format. This may be, for example, a rascaline + :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. + Alternatively, this could be multi-center descriptor, such as a pair + density. + """ + return self._correlate_density( + density, + self._correlation_order, + self._selected_keys, + self._skip_redundant, + self._output_selection, + None, + ) + + + # ==================================================================== + # ===== Private functions that do the work on the TensorMap level + # ==================================================================== + # TODO replace arguments with self. + def _correlate_density( + self, + density: TensorMap, + correlation_order: int, # TODO remove since it is self + selected_keys: List[Union[Labels, None]], # TODO remove since it is self + skip_redundant: List[bool], # TODO remove since it is self + output_selection: List[bool], # TODO remove since it is self + cg_coeffs: Union[_cg_cache.SparseCgDict, _cg_cache.DenseCgDict, None], + ) -> Union[TensorMap, List[TensorMap]]: + + # Check metadata + if not ( + density.keys.names == ["spherical_harmonics_l", "species_center"] + or density.keys.names + == ["spherical_harmonics_l", "species_center", "species_neighbor"] + ): + raise ValueError( + "input `density` must have key names" + ' ["spherical_harmonics_l", "species_center"] or' + ' ["spherical_harmonics_l", "species_center", "species_neighbor"]' ) - blocks_out.append(block_out) - keys_out = key_metadata[iteration][2] - density_correlation = TensorMap(keys=keys_out, blocks=blocks_out) - - # If this tensor is to be included in the output, move the [l1, l2, ...] - # keys to properties and store - if output_selection[iteration]: - density_correlations.append( - density_correlation.keys_to_properties( - [f"l{i}" for i in range(1, correlation_order_it + 1)] - + [f"k{i}" for i in range(2, correlation_order_it)] - ) + if not density.component_names == ["spherical_harmonics_m"]: + raise ValueError( + "input `density` must have a single component" + " axis with name `spherical_harmonics_m`" + ) + n_iterations = correlation_order - 1 # num iterations + density = _clebsch_gordan._standardize_keys(density) # standardize metadata + density_correlation = density # create a copy to combine with itself + + # TODO: implement combinations of gradients too + # we have to create a bool array with dispatch to be TorchScript compatible + contains_gradients = all( + [len(list(block.gradients())) > 0 for _, block in density.items()] + ) + if contains_gradients: + raise NotImplementedError( + "Clebsch Gordan combinations with gradients not yet implemented." + " Use metatensor.remove_gradients to remove gradients from the input." ) - # Drop redundant key names. TODO: these should be part of the global - # matadata associated with the TensorMap. Awaiting this functionality in - # metatensor. - for i, tensor in enumerate(density_correlations): - keys = tensor.keys - if len(_dispatch.unique(tensor.keys.column("order_nu"))) == 1: - keys = keys.remove(name="order_nu") - if len(_dispatch.unique(tensor.keys.column("inversion_sigma"))) == 1: - keys = keys.remove(name="inversion_sigma") - density_correlations[i] = TensorMap( - keys=keys, blocks=[b.copy() for b in tensor.blocks()] + # Pre-compute the keys needed to perform each CG iteration + key_metadata = _clebsch_gordan._precompute_keys( + density.keys, + density.keys, + n_iterations=n_iterations, + selected_keys=selected_keys, + skip_redundant=skip_redundant, ) + max_angular = max( + _dispatch.max(density.keys.column("spherical_harmonics_l")), + max( + [ + int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) + for mdata in key_metadata + ] + ), + ) + if self._max_angular < max_angular: + raise ValueError( + f"The density you provide requires max_angular={max_angular} " + f"but on initialization max_angular={self._max_angular} was given") + + # Perform iterative CG tensor products + density_correlations: List[TensorMap] = [] + for iteration in range(n_iterations): + # Define the correlation order of the current iteration + correlation_order_it = iteration + 2 + + # Combine block pairs + blocks_out: List[TensorBlock] = [] + key_metadata_i = key_metadata[iteration] + for j in range(len(key_metadata_i[0])): + key_1: LabelsEntry = key_metadata_i[0][j] + key_2: LabelsEntry = key_metadata_i[1][j] + lambda_out: int = int(key_metadata_i[2].column("spherical_harmonics_l")[j]) + block_out = _clebsch_gordan._combine_blocks_same_samples( + density_correlation.block(key_1), + density.block(key_2), + lambda_out, + cg_coeffs, + ) + blocks_out.append(block_out) + keys_out = key_metadata[iteration][2] + density_correlation = TensorMap(keys=keys_out, blocks=blocks_out) + + # If this tensor is to be included in the output, move the [l1, l2, ...] + # keys to properties and store + if output_selection[iteration]: + density_correlations.append( + density_correlation.keys_to_properties( + [f"l{i}" for i in range(1, correlation_order_it + 1)] + + [f"k{i}" for i in range(2, correlation_order_it)] + ) + ) + + # Drop redundant key names. TODO: these should be part of the global + # matadata associated with the TensorMap. Awaiting this functionality in + # metatensor. + for i, tensor in enumerate(density_correlations): + keys = tensor.keys + if len(_dispatch.unique(tensor.keys.column("order_nu"))) == 1: + keys = keys.remove(name="order_nu") + if len(_dispatch.unique(tensor.keys.column("inversion_sigma"))) == 1: + keys = keys.remove(name="inversion_sigma") + density_correlations[i] = TensorMap( + keys=keys, blocks=[b.copy() for b in tensor.blocks()] + ) - # Return a single TensorMap in the simple case - if len(density_correlations) == 1: - return density_correlations[0] + # Return a single TensorMap in the simple case + if len(density_correlations) == 1: + return density_correlations[0] - # Otherwise return a list of TensorMaps - return density_correlations + # Otherwise return a list of TensorMaps + return density_correlations diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index d72dcbe99..78204f78e 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -11,11 +11,12 @@ from rascaline.utils import PowerSpectrum from rascaline.utils.clebsch_gordan import _dispatch from rascaline.utils.clebsch_gordan._cg_cache import ClebschGordanReal -from rascaline.utils.clebsch_gordan._clebsch_gordan import _standardize_keys +from rascaline.utils.clebsch_gordan._clebsch_gordan import ( + _standardize_keys, + _precompute_keys +) from rascaline.utils.clebsch_gordan.correlate_density import ( - _correlate_density, - correlate_density, - correlate_density_metadata, + DensityCorrelations ) @@ -43,25 +44,11 @@ try: import torch - from torch import Tensor as TorchTensor - - torch_dtype = torch.dtype - torch_device = torch.device HAS_TORCH = True except ImportError: HAS_TORCH = False - # PR TODO below needed? - class TorchTensor: - pass - - class torch_dtype: - pass - - class torch_device: - pass - DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") @@ -173,6 +160,23 @@ def get_norm(tensor: TensorMap): return norm +def get_max_angular(density: TensorMap, calculator: DensityCorrelations): + key_metadata = _precompute_keys( + density.keys, + density.keys, + n_iterations=calculator._n_iterations, + selected_keys=calculator._selected_keys, + skip_redundant=calculator._skip_redundant, + ) + return max( + _dispatch.max(density.keys.column("spherical_harmonics_l")), + max( + [ + int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) + for mdata in key_metadata + ] + ), + ) # ============ Test equivariance ============ @@ -192,19 +196,14 @@ def test_so3_equivariance(): nu_1 = spherical_expansion(frames) nu_1_so3 = spherical_expansion(frames_so3) - - nu_3 = correlate_density( - density=nu_1, - correlation_order=nu_target, - angular_cutoff=angular_cutoff, - selected_keys=selected_keys, - ) - nu_3_so3 = correlate_density( - density=nu_1_so3, + corr_calculator = DensityCorrelations( + max_angular=3, correlation_order=nu_target, angular_cutoff=angular_cutoff, selected_keys=selected_keys, ) + nu_3 = corr_calculator.compute(nu_1) + nu_3_so3 = corr_calculator.compute(nu_1_so3) nu_3_transf = wig.transform_tensormap_so3(nu_3) assert metatensor.allclose(nu_3_transf, nu_3_so3) @@ -226,18 +225,14 @@ def test_o3_equivariance(): nu_1 = spherical_expansion(frames) nu_1_o3 = spherical_expansion(frames_o3) - nu_3 = correlate_density( - density=nu_1, - correlation_order=nu_target, - angular_cutoff=angular_cutoff, - selected_keys=selected_keys, - ) - nu_3_o3 = correlate_density( - density=nu_1_o3, + corr_calculator = DensityCorrelations( + max_angular=angular_cutoff, correlation_order=nu_target, angular_cutoff=angular_cutoff, selected_keys=selected_keys, ) + nu_3 = corr_calculator.compute(nu_1) + nu_3_o3 = corr_calculator.compute(nu_1_o3) nu_3_transf = wig.transform_tensormap_o3(nu_3) assert metatensor.allclose(nu_3_transf, nu_3_o3) @@ -261,13 +256,14 @@ def test_lambda_soap_vs_powerspectrum(): # Build a lambda-SOAP density = spherical_expansion(frames) - lsoap = correlate_density( - density=density, + corr_calculator = DensityCorrelations( + max_angular=SPHEX_HYPERS["max_angular"], correlation_order=2, selected_keys=Labels( names=["spherical_harmonics_l"], values=np.array([0]).reshape(-1, 1) ), ) + lsoap = corr_calculator.compute(density) keys = lsoap.keys.remove(name="spherical_harmonics_l") lsoap = TensorMap(keys=keys, blocks=[b.copy() for b in lsoap.blocks()]) @@ -311,21 +307,23 @@ def test_correlate_density_norm(correlation_order): nu1 = spherical_expansion_small(frames) # Build higher body order tensor without sorting the l lists - nux = correlate_density( - nu1, + corr_calculator = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"]*correlation_order, correlation_order=correlation_order, angular_cutoff=None, selected_keys=None, skip_redundant=False, ) - # Build higher body order tensor *with* sorting the l lists - nux_sorted_l = correlate_density( - nu1, + corr_calculator_skip_redundant = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"]*correlation_order, correlation_order=correlation_order, angular_cutoff=None, selected_keys=None, skip_redundant=True, ) + nux = corr_calculator.compute(nu1) + # Build higher body order tensor *with* sorting the l lists + nux_sorted_l = corr_calculator_skip_redundant.compute(nu1) # Standardize the features by passing through the CG combination code but with # no iterations (i.e. body order 1 -> 1) @@ -450,20 +448,21 @@ def test_correlate_density_dense_sparse_agree(): frames = h2o_periodic() density = spherical_expansion_small(frames) - # NOTE: testing the private function here so we can control the use of - # sparse v dense CG cache - n_body_sparse = _correlate_density( - density, - correlation_order=2, - compute_metadata_only=False, - sparse=True, + correlation_order = 2 + corr_calculator_sparse = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"]*correlation_order, + correlation_order=correlation_order, + cg_combine_backend="python-sparse" ) - n_body_dense = _correlate_density( - density, - correlation_order=2, - compute_metadata_only=False, - sparse=False, + corr_calculator_dense = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"]*correlation_order, + correlation_order=correlation_order, + cg_combine_backend="python-dense" ) + # NOTE: testing the private function here so we can control the use of + # sparse v dense CG cache + n_body_sparse = corr_calculator_sparse.compute(density) + n_body_dense = corr_calculator_dense.compute(density) assert metatensor.allclose(n_body_sparse, n_body_dense, atol=1e-8, rtol=1e-8) @@ -480,26 +479,22 @@ def test_correlate_density_metadata_agree(): :py:func:`correlate_density_metadata` agree. """ frames = h2o_isolated() - correlation_order = 3 skip_redundant = True - for nu1 in [spherical_expansion_small(frames), spherical_expansion(frames)]: - # Build higher body order tensor with CG computation - nux = correlate_density( - nu1, - correlation_order=correlation_order, + + for max_angular, nu1 in [(2, spherical_expansion_small(frames)), + (3, spherical_expansion(frames))]: + corr_calculator = DensityCorrelations( + max_angular=max_angular, + correlation_order=3, angular_cutoff=3, selected_keys=None, skip_redundant=skip_redundant, ) + # Build higher body order tensor with CG computation + nux = corr_calculator.compute(nu1) # Build higher body order tensor without CG computation - i.e. metadata # only - nux_metadata_only = correlate_density_metadata( - nu1, - correlation_order=correlation_order, - angular_cutoff=3, - selected_keys=None, - skip_redundant=skip_redundant, - ) + nux_metadata_only = corr_calculator.compute_metadata(nu1) assert metatensor.equal_metadata(nux, nux_metadata_only) @@ -522,13 +517,15 @@ def test_correlate_density_angular_selection( frames = h2o_isolated() nu_1 = spherical_expansion(frames) - nu_2 = correlate_density( - density=nu_1, - correlation_order=2, + correlation_order = 2 + corr_calculator = DensityCorrelations( + max_angular=SPHEX_HYPERS["max_angular"]*correlation_order, + correlation_order=correlation_order, angular_cutoff=None, selected_keys=selected_keys, skip_redundant=skip_redundant, ) + nu_2 = corr_calculator.compute(nu_1) if selected_keys is None: assert np.all( diff --git a/tox.ini b/tox.ini index b0d4c6e60..e78e2656a 100644 --- a/tox.ini +++ b/tox.ini @@ -72,12 +72,14 @@ commands = # note: platform_system can be "Linux","Darwin", or "Windows". description = Run Python unit tests with all dependencies installed (ase, pyscf, - and chemfiles are optional dependencies) and torch + and chemfiles are optional dependencies) and torch as array backend deps = {[testenv:all-deps]deps} torch commands = - pytest {[testenv]test_options} {posargs} + # for the moment only the correlation density tests test on torch arrays + pytest {[testenv]test_options} {posargs} \ + python/rascaline/tests/utils/correlate_density.py [testenv:min-deps] description = Run Python unit tests with the minimal dependencies installed From 5caaec8e5fafdc173f2903cd2519c448fb0276f8 Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Wed, 14 Feb 2024 06:50:04 +0100 Subject: [PATCH 03/23] all-deps all-deps-torch pass --- .../tests/utils/correlate_density.py | 32 ++-- .../utils/clebsch_gordan/__init__.py | 2 +- .../utils/clebsch_gordan/_cg_cache.py | 63 ++++---- .../utils/clebsch_gordan/_clebsch_gordan.py | 7 +- .../utils/clebsch_gordan/correlate_density.py | 141 +++++++++++------- .../tests/utils/correlate_density.py | 40 ++--- 6 files changed, 167 insertions(+), 118 deletions(-) diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py index 700db244c..d62c70ca4 100644 --- a/python/rascaline-torch/tests/utils/correlate_density.py +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -10,7 +10,7 @@ from metatensor.torch import Labels, TensorBlock, TensorMap # noqa import rascaline.torch -from rascaline.torch.utils.clebsch_gordan.correlate_density import correlate_density +from rascaline.torch.utils.clebsch_gordan.correlate_density import DensityCorrelations DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") @@ -57,25 +57,27 @@ def test_torch_script_correlate_density_angular_selection( """ frames = h2o_isolated() nu_1 = spherical_expansion(frames) - scripted_correlate_density = torch.jit.script(correlate_density) - scripted_nu_2 = scripted_correlate_density( - density=nu_1, - correlation_order=2, - angular_cutoff=None, - selected_keys=selected_keys, - skip_redundant=skip_redundant, - ) - nu_2 = correlate_density( - density=nu_1, - correlation_order=2, + correlation_order = 2 + corr_calculator = DensityCorrelations( + max_angular=SPHEX_HYPERS["max_angular"]*correlation_order + correlation_order=correlation_order, angular_cutoff=None, selected_keys=selected_keys, skip_redundant=skip_redundant, ) + + scripted_corr_calculator = torch.jit.script(corr_calculator) + + # Test compute + nu_2 = corr_calculator.compute(nu_1) + scripted_nu_2 = scripted_corr_calculator.compute(nu_1) + + assert metatensor.torch.equal_metadata(scripted_nu_2, nu_2) + assert metatensor.torch.allclose(scripted_nu_2, nu_2) + + # Teste compute_metadata + scripted_nu_2 = scripted_corr_calculator.compute_metadata(nu_1) assert metatensor.torch.equal_metadata(scripted_nu_2, nu_2) - # The test below cannot pass for the moment until we can script wigners or extract - # cg_cache out of the scripting. For the moment the output is only zeros - # assert metatensor.torch.allclose(scripted_nu_2, nu_2) def test_save_load(): diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py index dbdc2e5eb..0aa1ea28d 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py @@ -1,4 +1,4 @@ -from .correlate_density import DensityCorrelations # noqa +from .correlate_density import DensityCorrelations # noqa __all__ = [ diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index cdb043800..85ba42725 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -10,7 +10,7 @@ import wigners from . import _dispatch -from ._classes import Array, torch_jit_annotate, torch_jit_is_scripting, TorchModule +from ._classes import Array, TorchModule, torch_jit_annotate, torch_jit_is_scripting try: @@ -333,7 +333,7 @@ def _build_cg_coeff_dict( else: coeff_dict: Union[SparseCgDict, DenseCgDict] = DenseCgDict() - if use_torch or torch_jit_is_scripting(): + if use_torch: complex_like = torch.empty(0, dtype=torch.complex128) double_like = torch.empty(0, dtype=torch.double) else: @@ -517,12 +517,7 @@ def _complex_clebsch_gordan_matrix(l1: int, l2: int, lambda_: int, like: Array): if abs(l1 - l2) > lambda_ or abs(l1 + l2) < lambda_: return _dispatch.zeros_like(like, (2 * l1 + 1, 2 * l2 + 1, 2 * lambda_ + 1)) else: - # TODO temporary disable wigners package till refactor of cg correlate_density - # API - if torch_jit_is_scripting(): - return _dispatch.zeros_like(like, (2 * l1 + 1, 2 * l2 + 1, 2 * lambda_ + 1)) - else: - return wigners.clebsch_gordan_array(l1, l2, lambda_) + return wigners.clebsch_gordan_array(l1, l2, lambda_) # ================================================= @@ -534,7 +529,8 @@ def combine_arrays( arr_1: Array, arr_2: Array, lambda_: int, - cg_coeffs: Union[SparseCgDict, DenseCgDict, None], + cg_coeffs: Union[SparseCgDict, DenseCgDict], + cg_backend: Optional[str] = None, ) -> Array: """ Couples arrays `arr_1` and `arr_2` corresponding to the irreducible @@ -552,7 +548,7 @@ def combine_arrays( The ouput array has shape (n_i, 2 * lambda + 1, n_p * n_q), where lambda is the input parameter `lambda_`. - The Clebsch-Gordan coefficients are cached in `cg_cache`. Currently, these + The Clebsch-Gordan coefficients are cached in `cg_coeffs`. Currently, these must be produced by the ClebschGordanReal class in this module. These coefficients can be stored in either sparse dictionaries or dense arrays. @@ -570,25 +566,31 @@ def combine_arrays( :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1, n_p_properties] :param lambda_: int value of the resulting coupled channel - :param cg_cache: either a sparse dictionary with keys (m1, m2, mu) and array + :param cg_coeffs: either a sparse dictionary with keys (m1, m2, mu) and array values being sparse blocks of shape , or a dense array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + 1)]. - If it is None we only return an empty array + If it is None we only return an empty array of the shape. + :param cg_backend: specifies the combine backend with sparse CG coefficients. + It can have the values "python-sparse" and "mops". + :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] """ # If just precomputing metadata, return an empty array + if cg_coeffs is None: return empty_combine(arr_1, arr_2, lambda_) # We have to temporary store it so TorchScript can infer the correct type if isinstance(cg_coeffs, SparseCgDict): - return sparse_combine(arr_1, arr_2, lambda_, cg_coeffs) + return sparse_combine( + arr_1, arr_2, lambda_, cg_coeffs, cg_backend + ) elif isinstance(cg_coeffs, DenseCgDict): return dense_combine(arr_1, arr_2, lambda_, cg_coeffs) else: raise ValueError( - "Wrong type of cg coeffs, found type {type(cg_cache.coeffs)}," + "Wrong type of cg coeffs, found type {type(cg_coeffs)}," " but only support SparseCgDict, DenseCgDict" ) @@ -616,7 +618,8 @@ def sparse_combine( arr_1: Array, arr_2: Array, lambda_: int, - cg_cache_coeffs: SparseCgDict, + cg_coeffs: SparseCgDict, + cg_backend: str, ) -> Array: """ Performs a Clebsch-Gordan combination step on 2 arrays using sparse @@ -629,8 +632,10 @@ def sparse_combine( :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1, n_p_properties] :param lambda_: int value of the resulting coupled channel - :param cg_cache: sparse dictionary with keys (m1, m2, mu) and array values + :param cg_coeffs: sparse dictionary with keys (m1, m2, mu) and array values being sparse blocks of shape + :param cg_backend: specifies the combine backend with sparse CG coefficients. + It can have the values "python-sparse" and "mops" :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] """ @@ -646,15 +651,18 @@ def sparse_combine( n_p = arr_1.shape[2] # number of properties in arr_1 n_q = arr_2.shape[2] # number of properties in arr_2 - if isinstance(arr_1, TorchTensor) or not HAS_MOPS: + # The isinstance checks and cg_backend checks makes the logic a bit redundant + # but the redundancy by the isinstance check is required for TorchScript. Logic + # can be made more straightforward once MOPS support TorchScript + if isinstance(arr_1, TorchTensor) or cg_backend == "python-sparse": # Initialise output array arr_out = _dispatch.zeros_like(arr_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) # Get the corresponding Clebsch-Gordan coefficients - cg_coeffs = cg_cache_coeffs.get(l1, l2, lambda_) + cg_l1l2lam = cg_coeffs.get(l1, l2, lambda_) # Fill in each mu component of the output array in turn - for item in cg_coeffs.keys(): + for item in cg_l1l2lam.keys(): m1 = item[0] m2 = item[1] mu = item[2] @@ -662,11 +670,11 @@ def sparse_combine( arr_out[:, mu, :] += ( arr_1[:, m1, :, None] * arr_2[:, m2, None, :] - * cg_coeffs.get(m1, m2, mu) + * cg_l1l2lam.get(m1, m2, mu) ).reshape(n_i, n_p * n_q) return arr_out - elif isinstance(arr_1, np.ndarray) and HAS_MOPS: + elif isinstance(arr_1, np.ndarray) and cg_backend == "mops": # Reshape arr_1 = np.repeat(arr_1[:, :, :, None], n_q, axis=3).reshape( n_i, 2 * l1 + 1, n_p * n_q @@ -682,7 +690,7 @@ def sparse_combine( arr_out = sap( arr_1, arr_2, - *cg_cache_coeffs[(l1, l2, lambda_)], + *cg_coeffs.get(l1, l2, lambda_), output_size=2 * lambda_ + 1, ) assert arr_out.shape == (n_i * n_p * n_q, 2 * lambda_ + 1) @@ -692,6 +700,11 @@ def sparse_combine( arr_out = _dispatch.swapaxes(arr_out, 1, 2) return arr_out + elif cg_backend not in ["python", "mops"]: + raise ValueError( + f"sparse cg backend '{cg_backend}' is not known. " + "Only values 'python-sparse' and 'mops' are valid." + ) else: raise TypeError(UNKNOWN_ARRAY_TYPE) @@ -700,7 +713,7 @@ def dense_combine( arr_1: Array, arr_2: Array, lambda_: int, - cg_cache_coeffs: DenseCgDict, + cg_coeffs: DenseCgDict, ) -> Array: """ Performs a Clebsch-Gordan combination step on 2 arrays using a dense @@ -713,7 +726,7 @@ def dense_combine( :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1, n_p_properties] :param lambda_: int value of the resulting coupled channel - :param cg_cache: dense array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + + :param cg_coeffs: dense array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + 1)] :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] @@ -721,7 +734,7 @@ def dense_combine( # Infer l1 and l2 from the len of the length of axis 1 of each tensor l1 = (arr_1.shape[1] - 1) // 2 l2 = (arr_2.shape[1] - 1) // 2 - cg_coeffs = cg_cache_coeffs.get(l1, l2, lambda_) + cg_coeffs = cg_coeffs.get(l1, l2, lambda_) # (samples None None l1_mu q) * (samples l2_mu p None None) # -> (samples l2_mu p l1_mu q) we broadcast it in this way diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index fa392ea41..ae7ac14b8 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -18,6 +18,7 @@ torch_jit_is_scripting, ) + # ================================================================== # ===== Functions to handle metadata # ================================================================== @@ -591,15 +592,19 @@ def _combine_blocks_same_samples( block_2: TensorBlock, lambda_: int, cg_coeffs: Union[_cg_cache.SparseCgDict, _cg_cache.DenseCgDict, None], + cg_backend: str ) -> TensorBlock: """ For a given pair of TensorBlocks and desired angular channel, combines the values arrays and returns a new TensorBlock. + + If cg_coeffs are None, tensor blocks with empty arrays are returned that only + contain the metadata. """ # Do the CG combination - single center so no shape pre-processing required combined_values = _cg_cache.combine_arrays( - block_1.values, block_2.values, lambda_, cg_coeffs + block_1.values, block_2.values, lambda_, cg_coeffs, cg_backend ) # Infer the new nu value: block 1's properties are nu pairs of diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 525808932..392687b44 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -5,13 +5,22 @@ """ from typing import List, Optional, Union + import numpy as np from . import _cg_cache, _clebsch_gordan, _dispatch -from ._classes import Labels, LabelsEntry, TensorBlock, TensorMap, TorchTensor, TorchModule, torch_jit_is_scripting +from ._classes import ( + Labels, + LabelsEntry, + TensorBlock, + TensorMap, + TorchModule, + torch_jit_is_scripting, +) + try: - from mops import sparse_accumulation_of_products as sap # noqa F401 + import mops HAS_MOPS = True except ImportError: @@ -28,6 +37,7 @@ # ===== Public API functions # ====================================================================== + class DensityCorrelations(TorchModule): """ Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor @@ -77,7 +87,7 @@ class DensityCorrelations(TorchModule): this controls the output at each corresponding iteration. If None is passed, only the final iteration is output. :param arrays_backend: Determines the array backend be "numpy" or "torch" - :param cg_combine_backend: Determines the backend for the CG combination. It can + :param cg_backend: Determines the backend for the CG combination. It can be even "python-sparse", "python-dense" or "mops". If the CG combination performs on the sparse coefficients, it means that for each (l1, l2, lambda) block the (m1, m2, mu) coefficients are stored in a sparse format only storing @@ -95,6 +105,7 @@ class DensityCorrelations(TorchModule): from a single iteration is requested, a :py:class:`TensorMap` is returned instead. """ + def __init__( self, max_angular: int, @@ -104,7 +115,7 @@ def __init__( skip_redundant: Optional[Union[bool, List[bool]]] = False, output_selection: Optional[Union[bool, List[bool]]] = None, arrays_backend: Optional[str] = None, - cg_combine_backend: Optional[str] = None, + cg_backend: Optional[str] = None, ): super().__init__() if arrays_backend is None: @@ -116,51 +127,55 @@ def __init__( if torch_jit_is_scripting(): raise ValueError( "Module is torch scripted but 'numpy' was given as `arrays_backend`" - ) + ) self._arrays_backend = "numpy" elif arrays_backend == "torch": self._arrays_backend = "torch" else: raise ValueError( f"Unkown `arrays_backend` {arrays_backend}." - "Only 'numpy' and 'torch' are supported.") + "Only 'numpy' and 'torch' are supported." + ) # Choosing the optimal cg combine backend - if cg_combine_backend is None: + if cg_backend is None: if self._arrays_backend == "torch": - self._cg_combine_backend = "python-dense" + self._cg_backend = "python-dense" if self._arrays_backend == "numpy" and HAS_MOPS: - self._cg_combine_backend = "mops" + self._cg_backend = "mops" else: - self._cg_combine_backend = "python-sparse" - elif cg_combine_backend == "python-dense": - self._cg_combine_backend = "python-dense" - elif cg_combine_backend == "python-sparse": - self._cg_combine_backend = "python-sparse" - elif cg_combine_backend == "mops": + self._cg_backend = "python-sparse" + elif cg_backend == "python-dense": + self._cg_backend = "python-dense" + elif cg_backend == "python-sparse": + self._cg_backend = "python-sparse" + elif cg_backend == "mops": if self._arrays_backend == "torch": raise NotImplementedError( "'numpy' was determined or given as `arrays_backend` " - "and 'mops' was given as `cg_combine_backend`, " + "and 'mops' was given as `cg_backend`, " "but mops does not support torch backend yet" ) else: raise ValueError( - f"Unkown `cg_combined_backend` {cg_combined_backend}." - "Only 'python-dense', 'python-sparse' and 'mops' are supported.") + f"Unkown `cg_backend` {cg_backend}." + "Only 'python-dense', 'python-sparse' and 'mops' are supported." + ) if max_angular < 0: - raise ValueError(f"Given `max_angular={max_angular}` negative. " - "Must be greater equal 0.") + raise ValueError( + f"Given `max_angular={max_angular}` negative. " + "Must be greater equal 0." + ) self._max_angular = max_angular - if self._cg_combine_backend == "python-dense": + if self._cg_backend == "python-dense": sparse = False use_mops = False - elif self._cg_combine_backend == "python-sparse": + elif self._cg_backend == "python-sparse": sparse = True use_mops = False - elif self._cg_combine_backend == "mops": + elif self._cg_backend == "mops": sparse = True use_mops = True @@ -185,21 +200,23 @@ def __init__( elif self._arrays_backend == "numpy": array_like = np.empty(0) - self._selected_keys : List[Union[Labels, None]] = \ + self._selected_keys: List[Union[Labels, None]] = ( _clebsch_gordan._parse_selected_keys( n_iterations=n_iterations, array_like=array_like, angular_cutoff=self._angular_cutoff, selected_keys=selected_keys, ) + ) # Parse the bool flags that control skipping of redundant CG combinations # and TensorMap output from each iteration - self._skip_redundant, self._output_selection = \ + self._skip_redundant, self._output_selection = ( _clebsch_gordan._parse_bool_iteration_filters( n_iterations, skip_redundant=skip_redundant, output_selection=output_selection, ) + ) # Compute CG coefficient cache # TODO: keys have been precomputed, so perhaps we don't need to @@ -207,13 +224,29 @@ def __init__( # TODO: use sparse cache by default until we understand under which # circumstances (and if) dense is faster + @property + def correlation_order(self): + return self._correlation_order + + @property + def selected_keys(self) -> List[Union[Labels, None]]: + return self._selected_keys + + @property + def skip_redundant(self) -> List[bool]: + return self._skip_redundant + + @property + def output_selection(self) -> List[bool]: + return self._output_selection + @property def arrays_backend(self): return self._arrays_backend @property - def cg_combine_backend(self): - return self._cg_combine_backend + def cg_backend(self): + return self._cg_backend @property def cg_coeffs(self) -> Union[_cg_cache.SparseCgDict, _cg_cache.DenseCgDict]: @@ -235,14 +268,9 @@ def compute(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: """ return self._correlate_density( density, - self._correlation_order, - self._selected_keys, - self._skip_redundant, - self._output_selection, - self._cg_coeffs, + compute_metadata=False, ) - def compute_metadata( self, density: TensorMap, @@ -261,14 +289,9 @@ def compute_metadata( """ return self._correlate_density( density, - self._correlation_order, - self._selected_keys, - self._skip_redundant, - self._output_selection, - None, + compute_metadata=True, ) - # ==================================================================== # ===== Private functions that do the work on the TensorMap level # ==================================================================== @@ -276,11 +299,7 @@ def compute_metadata( def _correlate_density( self, density: TensorMap, - correlation_order: int, # TODO remove since it is self - selected_keys: List[Union[Labels, None]], # TODO remove since it is self - skip_redundant: List[bool], # TODO remove since it is self - output_selection: List[bool], # TODO remove since it is self - cg_coeffs: Union[_cg_cache.SparseCgDict, _cg_cache.DenseCgDict, None], + compute_metadata: bool ) -> Union[TensorMap, List[TensorMap]]: # Check metadata @@ -299,7 +318,7 @@ def _correlate_density( "input `density` must have a single component" " axis with name `spherical_harmonics_m`" ) - n_iterations = correlation_order - 1 # num iterations + n_iterations = self._correlation_order - 1 # num iterations density = _clebsch_gordan._standardize_keys(density) # standardize metadata density_correlation = density # create a copy to combine with itself @@ -319,25 +338,30 @@ def _correlate_density( density.keys, density.keys, n_iterations=n_iterations, - selected_keys=selected_keys, - skip_redundant=skip_redundant, + selected_keys=self._selected_keys, + skip_redundant=self._skip_redundant, ) max_angular = max( _dispatch.max(density.keys.column("spherical_harmonics_l")), - max( - [ - int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) - for mdata in key_metadata - ] - ), + max( + [ + int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) + for mdata in key_metadata + ] + ), ) if self._max_angular < max_angular: raise ValueError( - f"The density you provide requires max_angular={max_angular} " - f"but on initialization max_angular={self._max_angular} was given") + f"The density you provide requires max_angular={max_angular} " + f"but on initialization max_angular={self._max_angular} was given" + ) # Perform iterative CG tensor products density_correlations: List[TensorMap] = [] + if compute_metadata: + cg_coeffs = None + else: + cg_coeffs = self._cg_coeffs for iteration in range(n_iterations): # Define the correlation order of the current iteration correlation_order_it = iteration + 2 @@ -348,12 +372,15 @@ def _correlate_density( for j in range(len(key_metadata_i[0])): key_1: LabelsEntry = key_metadata_i[0][j] key_2: LabelsEntry = key_metadata_i[1][j] - lambda_out: int = int(key_metadata_i[2].column("spherical_harmonics_l")[j]) + lambda_out: int = int( + key_metadata_i[2].column("spherical_harmonics_l")[j] + ) block_out = _clebsch_gordan._combine_blocks_same_samples( density_correlation.block(key_1), density.block(key_2), lambda_out, cg_coeffs, + self._cg_backend, ) blocks_out.append(block_out) keys_out = key_metadata[iteration][2] @@ -361,7 +388,7 @@ def _correlate_density( # If this tensor is to be included in the output, move the [l1, l2, ...] # keys to properties and store - if output_selection[iteration]: + if self._output_selection[iteration]: density_correlations.append( density_correlation.keys_to_properties( [f"l{i}" for i in range(1, correlation_order_it + 1)] diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index 78204f78e..b2f2784f0 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -12,12 +12,10 @@ from rascaline.utils.clebsch_gordan import _dispatch from rascaline.utils.clebsch_gordan._cg_cache import ClebschGordanReal from rascaline.utils.clebsch_gordan._clebsch_gordan import ( + _precompute_keys, _standardize_keys, - _precompute_keys -) -from rascaline.utils.clebsch_gordan.correlate_density import ( - DensityCorrelations ) +from rascaline.utils.clebsch_gordan.correlate_density import DensityCorrelations # Try to import some modules @@ -160,6 +158,7 @@ def get_norm(tensor: TensorMap): return norm + def get_max_angular(density: TensorMap, calculator: DensityCorrelations): key_metadata = _precompute_keys( density.keys, @@ -170,14 +169,15 @@ def get_max_angular(density: TensorMap, calculator: DensityCorrelations): ) return max( _dispatch.max(density.keys.column("spherical_harmonics_l")), - max( - [ - int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) - for mdata in key_metadata - ] - ), + max( + [ + int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) + for mdata in key_metadata + ] + ), ) + # ============ Test equivariance ============ @@ -308,14 +308,14 @@ def test_correlate_density_norm(correlation_order): # Build higher body order tensor without sorting the l lists corr_calculator = DensityCorrelations( - max_angular=SPHEX_HYPERS_SMALL["max_angular"]*correlation_order, + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, correlation_order=correlation_order, angular_cutoff=None, selected_keys=None, skip_redundant=False, ) corr_calculator_skip_redundant = DensityCorrelations( - max_angular=SPHEX_HYPERS_SMALL["max_angular"]*correlation_order, + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, correlation_order=correlation_order, angular_cutoff=None, selected_keys=None, @@ -450,14 +450,14 @@ def test_correlate_density_dense_sparse_agree(): correlation_order = 2 corr_calculator_sparse = DensityCorrelations( - max_angular=SPHEX_HYPERS_SMALL["max_angular"]*correlation_order, + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, correlation_order=correlation_order, - cg_combine_backend="python-sparse" + cg_backend="python-sparse", ) corr_calculator_dense = DensityCorrelations( - max_angular=SPHEX_HYPERS_SMALL["max_angular"]*correlation_order, + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, correlation_order=correlation_order, - cg_combine_backend="python-dense" + cg_backend="python-dense", ) # NOTE: testing the private function here so we can control the use of # sparse v dense CG cache @@ -481,8 +481,10 @@ def test_correlate_density_metadata_agree(): frames = h2o_isolated() skip_redundant = True - for max_angular, nu1 in [(2, spherical_expansion_small(frames)), - (3, spherical_expansion(frames))]: + for max_angular, nu1 in [ + (2, spherical_expansion_small(frames)), + (3, spherical_expansion(frames)), + ]: corr_calculator = DensityCorrelations( max_angular=max_angular, correlation_order=3, @@ -519,7 +521,7 @@ def test_correlate_density_angular_selection( correlation_order = 2 corr_calculator = DensityCorrelations( - max_angular=SPHEX_HYPERS["max_angular"]*correlation_order, + max_angular=SPHEX_HYPERS["max_angular"] * correlation_order, correlation_order=correlation_order, angular_cutoff=None, selected_keys=selected_keys, From 155f36cd621b3e2ba93d2b8f0bd7a7cc7ccfd7bf Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Wed, 14 Feb 2024 17:19:09 +0100 Subject: [PATCH 04/23] change ClebschGordanReal to TensorMap From Dict of Tuple[int,int,int] to TensorMap all-deps, all-deps-torch are passing --- .../rascaline/torch/utils/clebsch_gordan.py | 1 + .../tests/utils/correlate_density.py | 9 +- .../utils/clebsch_gordan/_cg_cache.py | 243 ++++++++---------- .../utils/clebsch_gordan/_clebsch_gordan.py | 4 +- .../utils/clebsch_gordan/correlate_density.py | 19 +- .../tests/utils/correlate_density.py | 8 +- 6 files changed, 143 insertions(+), 141 deletions(-) diff --git a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py index 94b7f82ea..2f528cf9c 100644 --- a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py +++ b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py @@ -27,6 +27,7 @@ module.__dict__["torch_jit_is_scripting"] = torch.jit.is_scripting module.__dict__["torch_jit_annotate"] = torch.jit.annotate module.__dict__["TorchTensor"] = torch.Tensor +module.__dict__["TorchModule"] = torch.nn.Module module.__dict__["Array"] = torch.Tensor diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py index d62c70ca4..a01fa65ed 100644 --- a/python/rascaline-torch/tests/utils/correlate_density.py +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -59,7 +59,7 @@ def test_torch_script_correlate_density_angular_selection( nu_1 = spherical_expansion(frames) correlation_order = 2 corr_calculator = DensityCorrelations( - max_angular=SPHEX_HYPERS["max_angular"]*correlation_order + max_angular=SPHEX_HYPERS["max_angular"] * correlation_order, correlation_order=correlation_order, angular_cutoff=None, selected_keys=selected_keys, @@ -81,7 +81,12 @@ def test_torch_script_correlate_density_angular_selection( def test_save_load(): - scripted_correlate_density = torch.jit.script(correlate_density) + corr_calculator = DensityCorrelations( + max_angular=2, + correlation_order=2, + angular_cutoff=1, + ) + scripted_correlate_density = torch.jit.script(corr_calculator) buffer = io.BytesIO() torch.jit.save(scripted_correlate_density, buffer) buffer.seek(0) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index 85ba42725..1194b3673 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -4,13 +4,20 @@ """ import math -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional import numpy as np import wigners from . import _dispatch -from ._classes import Array, TorchModule, torch_jit_annotate, torch_jit_is_scripting +from ._classes import ( + Array, + Labels, + TensorBlock, + TensorMap, + TorchModule, + torch_jit_is_scripting, +) try: @@ -158,6 +165,7 @@ def __init__( ): super().__init__() self._lambda_max = lambda_max + self._sparse = sparse # For TorchScript we declare type self._use_mops: bool = False @@ -218,7 +226,7 @@ def lambda_max(self): @property def sparse(self): - return isinstance(self._coeffs, SparseCgDict) + return self._sparse @property def use_mops(self): @@ -229,94 +237,6 @@ def coeffs(self): return self._coeffs -class DenseCgDict: - """ - This is a class imtates the access of a Dict[Tuple[int, int, int], Array] object. - We cannot directly use a dict of this type because we support TorchScript - and TorchScript only supports dicts of type Dict[int], Dict[float], Dict[str]. - Internally we represent data structure as Dict[int, Dict[int, Dict[int, Array]]] - - Reference - --------- - https://pytorch.org/docs/stable/jit_language_reference.html - """ - - def __init__(self): - self._dict: Dict[int, Dict[int, Dict[int, Array]]] = {} - - def get(self, i: int, j: int, k: int): - # __getitem__ is not supported by TorchScript - return self._dict[i][j][k] - - def set(self, i: int, j: int, k: int, value: Array): - # __setitem__ is not supported by TorchScript - if i not in self._dict: - self._dict[i] = torch_jit_annotate(Dict[int, Dict[int, Array]], {}) - if j not in self._dict[i]: - self._dict[i][j] = torch_jit_annotate(Dict[int, Array], {}) - self._dict[i][j][k] = value - - def delete(self, i: int, j: int, k: int): - # __delitem__ is not supported by TorchScript - del self._dict[i][j][k] - if len(self._dict[i][j]) == 0: - del self._dict[i][j] - if len(self._dict[i]) == 0: - del self._dict[i] - - def keys(self): - keys: List[List[int]] = [] - for i in self._dict.keys(): - for j in self._dict[i].keys(): - for k in self._dict[i][j].keys(): - keys.append([i, j, k]) - return keys - - -class SparseCgDict: - """ - This is a class imtates the access of a Dict[Tuple[int, int, int], Array] object. - We cannot directly use a dict of this type because we support TorchScript - and TorchScript only supports dicts of type Dict[int], Dict[float], Dict[str]. - Internally we represent data structure as Dict[int, Dict[int, Dict[int, Array]]] - - Reference - --------- - https://pytorch.org/docs/stable/jit_language_reference.html - """ - - def __init__(self): - self._dict: Dict[int, Dict[int, Dict[int, DenseCgDict]]] = {} - - def get(self, l1: int, l2: int, lambda_: int): - # __getitem__ is not supported by TorchScript - return self._dict[l1][l2][lambda_] - - def set(self, l1: int, l2: int, lambda_: int, value: DenseCgDict): - # __setitem__ is not supported by TorchScript - if l1 not in self._dict: - self._dict[l1] = torch_jit_annotate(Dict[int, Dict[int, DenseCgDict]], {}) - if l2 not in self._dict[l1]: - self._dict[l1][l2] = torch_jit_annotate(Dict[int, DenseCgDict], {}) - self._dict[l1][l2][lambda_] = value - - def delete(self, l1: int, l2: int, lambda_: int): - # __delitem__ is not supported by TorchScript - del self._dict[l1][l2][lambda_] - if len(self._dict[l1][l2]) == 0: - del self._dict[l1][l2] - if len(self._dict[l1]) == 0: - del self._dict[l1] - - def keys(self): - keys: List[List[int]] = [] - for l1 in self._dict.keys(): - for l2 in self._dict[l1].keys(): - for lambda_ in self._dict[l1][l2].keys(): - keys.append([l1, l2, lambda_]) - return keys - - def _build_cg_coeff_dict( lambda_max: int, sparse: bool, use_mops: bool, use_torch: bool ): @@ -328,17 +248,25 @@ def _build_cg_coeff_dict( r2c: Dict[int, Array] = {} c2r: Dict[int, Array] = {} - if sparse: - coeff_dict: Union[SparseCgDict, DenseCgDict] = SparseCgDict() - else: - coeff_dict: Union[SparseCgDict, DenseCgDict] = DenseCgDict() + coeff_dict = {} if use_torch: complex_like = torch.empty(0, dtype=torch.complex128) double_like = torch.empty(0, dtype=torch.double) + # For metatensor-core backen we have to use the for Labels numpy arrays + # even with use_torch true. Logic is a nested because while scripting + # the compiler may not see `torch.ScriptClass` + if torch_jit_is_scripting(): + labels_values_like = torch.empty(0, dtype=torch.double) + else: + if isinstance(Labels, torch.ScriptClass): + labels_values_like = torch.empty(0, dtype=torch.double) + else: + labels_values_like = np.empty(0, dtype=np.double) else: complex_like = np.empty(0, dtype=np.complex128) double_like = np.empty(0, dtype=np.double) + labels_values_like = np.empty(0, dtype=np.double) for lambda_ in range(0, lambda_max + 1): c2r[lambda_] = _complex2real(lambda_, like=complex_like) @@ -370,7 +298,7 @@ def _build_cg_coeff_dict( else: cg_l1l2lam_dense = _dispatch.imag(real_cg) - if isinstance(coeff_dict, SparseCgDict): + if sparse: # Find the m1, m2, mu idxs of the nonzero CG coeffs nonzeros_cg_coeffs_idx = _dispatch.where( _dispatch.abs(cg_l1l2lam_dense) > 1e-15 @@ -403,24 +331,85 @@ def _build_cg_coeff_dict( mu_arr = _dispatch.int_array_like(mu_arr, double_like)[mu_idxs] C_arr = _dispatch.double_array_like(C_arr, double_like)[mu_idxs] cg_l1l2lam_sparse = (C_arr, m1_arr, m2_arr, mu_arr) - coeff_dict.set(l1, l2, lambda_, cg_l1l2lam_sparse) + coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam_sparse else: # Otherwise fall back to torch/numpy and store as # sparse dicts. - cg_l1l2lam_sparse = DenseCgDict() + cg_l1l2lam_sparse = {} for i in range(len(nonzeros_cg_coeffs_idx[0])): m1 = nonzeros_cg_coeffs_idx[0][i] m2 = nonzeros_cg_coeffs_idx[1][i] mu = nonzeros_cg_coeffs_idx[2][i] - cg_l1l2lam_sparse.set( - m1, m2, mu, cg_l1l2lam_dense[m1, m2, mu] - ) - coeff_dict.set(l1, l2, lambda_, cg_l1l2lam_sparse) + cg_l1l2lam_sparse[(m1, m2, mu)] = cg_l1l2lam_dense[ + m1, m2, mu + ] + coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam_sparse else: # Store - coeff_dict.set(l1, l2, lambda_, cg_l1l2lam_dense) - - return coeff_dict + coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam_dense + blocks = [] + if sparse: + for l1l2lam_dict in coeff_dict.values(): + l1l2lam_sample_values = [] + for m1m2mu_key in l1l2lam_dict.keys(): + l1l2lam_sample_values.append(m1m2mu_key) + # extending shape by samples and properties + values = _dispatch.double_array_like( + [*l1l2lam_dict.values()], double_like + ).reshape(-1, 1) + l1l2lam_sample_values = _dispatch.int_array_like( + l1l2lam_sample_values, double_like + ) + # we have to move put the m1 m2 m3 inside a block so we can access it easier + # inside cg combine function, + blocks.append( + TensorBlock( + values=values, + samples=Labels(["m1", "m2", "mu"], l1l2lam_sample_values), + components=[], + properties=Labels.range("property", 1), + ) + ) + keys = Labels( + ["l1", "l2", "lambda"], + _dispatch.int_array_like(list(coeff_dict.keys()), labels_values_like), + ) + else: + keys = Labels( + ["l1", "l2", "lambda"], + _dispatch.int_array_like(list(coeff_dict.keys()), labels_values_like), + ) + for l1l2lam_values in coeff_dict.values(): + # extending shape by samples and properties + block_value_shape = (1,) + l1l2lam_values.shape + (1,) + blocks.append( + TensorBlock( + values=l1l2lam_values.reshape(block_value_shape), + samples=Labels.range("sample", 1), + components=[ + Labels( + ["m1"], + _dispatch.int_range_like( + 0, l1l2lam_values.shape[0], labels_values_like + ).reshape(-1, 1), + ), + Labels( + ["m2"], + _dispatch.int_range_like( + 0, l1l2lam_values.shape[1], labels_values_like + ).reshape(-1, 1), + ), + Labels( + ["mu"], + _dispatch.int_range_like( + 0, l1l2lam_values.shape[2], labels_values_like + ).reshape(-1, 1), + ), + ], + properties=Labels.range("property", 1), + ) + ) + return TensorMap(keys, blocks) # ============================ @@ -529,7 +518,7 @@ def combine_arrays( arr_1: Array, arr_2: Array, lambda_: int, - cg_coeffs: Union[SparseCgDict, DenseCgDict], + cg_coeffs: TensorMap, cg_backend: Optional[str] = None, ) -> Array: """ @@ -577,21 +566,18 @@ def combine_arrays( :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] """ # If just precomputing metadata, return an empty array - if cg_coeffs is None: return empty_combine(arr_1, arr_2, lambda_) # We have to temporary store it so TorchScript can infer the correct type - if isinstance(cg_coeffs, SparseCgDict): - return sparse_combine( - arr_1, arr_2, lambda_, cg_coeffs, cg_backend - ) - elif isinstance(cg_coeffs, DenseCgDict): + if cg_backend == "python-sparse" or cg_backend == "mops": + return sparse_combine(arr_1, arr_2, lambda_, cg_coeffs, cg_backend) + elif cg_backend == "python-dense": return dense_combine(arr_1, arr_2, lambda_, cg_coeffs) else: raise ValueError( - "Wrong type of cg coeffs, found type {type(cg_coeffs)}," - " but only support SparseCgDict, DenseCgDict" + "Wrong cg_backend, got '{cg_backend}'," + " but only support 'python-dense', 'python-sparse' and 'mops'." ) @@ -618,7 +604,7 @@ def sparse_combine( arr_1: Array, arr_2: Array, lambda_: int, - cg_coeffs: SparseCgDict, + cg_coeffs: TensorMap, cg_backend: str, ) -> Array: """ @@ -659,18 +645,16 @@ def sparse_combine( arr_out = _dispatch.zeros_like(arr_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) # Get the corresponding Clebsch-Gordan coefficients - cg_l1l2lam = cg_coeffs.get(l1, l2, lambda_) - # Fill in each mu component of the output array in turn - for item in cg_l1l2lam.keys(): - m1 = item[0] - m2 = item[1] - mu = item[2] + cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}) + for i in range(len(cg_l1l2lam.samples)): + m1m2mu_key = cg_l1l2lam.samples.entry(i) + m1 = m1m2mu_key[0] + m2 = m1m2mu_key[1] + mu = m1m2mu_key[2] # Broadcast arrays, multiply together and with CG coeff arr_out[:, mu, :] += ( - arr_1[:, m1, :, None] - * arr_2[:, m2, None, :] - * cg_l1l2lam.get(m1, m2, mu) + arr_1[:, m1, :, None] * arr_2[:, m2, None, :] * cg_l1l2lam.values[i, 0] ).reshape(n_i, n_p * n_q) return arr_out @@ -690,7 +674,7 @@ def sparse_combine( arr_out = sap( arr_1, arr_2, - *cg_coeffs.get(l1, l2, lambda_), + *cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}).values.flatten(), output_size=2 * lambda_ + 1, ) assert arr_out.shape == (n_i * n_p * n_q, 2 * lambda_ + 1) @@ -713,7 +697,7 @@ def dense_combine( arr_1: Array, arr_2: Array, lambda_: int, - cg_coeffs: DenseCgDict, + cg_coeffs: TensorMap, ) -> Array: """ Performs a Clebsch-Gordan combination step on 2 arrays using a dense @@ -734,7 +718,8 @@ def dense_combine( # Infer l1 and l2 from the len of the length of axis 1 of each tensor l1 = (arr_1.shape[1] - 1) // 2 l2 = (arr_2.shape[1] - 1) // 2 - cg_coeffs = cg_coeffs.get(l1, l2, lambda_) + + cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}).values # (samples None None l1_mu q) * (samples l2_mu p None None) # -> (samples l2_mu p l1_mu q) we broadcast it in this way @@ -752,11 +737,11 @@ def dense_combine( ) # (l1_mu l2_mu lam_mu) -> ((l1_mu l2_mu) lam_mu) - cg_coeffs = cg_coeffs.reshape(-1, 2 * lambda_ + 1) + cg_l1l2lam = cg_l1l2lam.reshape(-1, 2 * lambda_ + 1) # (samples (q p) (l1_mu l2_mu)) @ ((l1_mu l2_mu) lam_mu) # -> samples (q p) lam_mu - arr_out = arr_out @ cg_coeffs + arr_out = arr_out @ cg_l1l2lam # (samples (q p) lam_mu) -> (samples lam_mu (q p)) return _dispatch.swapaxes(arr_out, 1, 2) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index ae7ac14b8..0c017d1a1 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -591,8 +591,8 @@ def _combine_blocks_same_samples( block_1: TensorBlock, block_2: TensorBlock, lambda_: int, - cg_coeffs: Union[_cg_cache.SparseCgDict, _cg_cache.DenseCgDict, None], - cg_backend: str + cg_coeffs: Union[TensorMap, None], + cg_backend: str, ) -> TensorBlock: """ For a given pair of TensorBlocks and desired angular channel, combines the diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 392687b44..d4069240a 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -20,7 +20,7 @@ try: - import mops + import mops # noqa F401 HAS_MOPS = True except ImportError: @@ -179,6 +179,8 @@ def __init__( sparse = True use_mops = True + # We cannot store this into one member variable because TorchScript + # has problems scripting which cannot be resolved with isinstance checks self._cg_coeffs = _cg_cache.ClebschGordanReal( self._max_angular, sparse=sparse, @@ -229,7 +231,13 @@ def correlation_order(self): return self._correlation_order @property - def selected_keys(self) -> List[Union[Labels, None]]: + def selected_keys(self): + """ + Outputs the selected keys used in the CG iterations of type List[Union[Labels, + None]]. + """ + # TorchScript cannot infer the type properly so we removed the type hint of + # output return self._selected_keys @property @@ -249,7 +257,7 @@ def cg_backend(self): return self._cg_backend @property - def cg_coeffs(self) -> Union[_cg_cache.SparseCgDict, _cg_cache.DenseCgDict]: + def cg_coeffs(self) -> TensorMap: return self._cg_coeffs def forward(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: @@ -297,9 +305,7 @@ def compute_metadata( # ==================================================================== # TODO replace arguments with self. def _correlate_density( - self, - density: TensorMap, - compute_metadata: bool + self, density: TensorMap, compute_metadata: bool ) -> Union[TensorMap, List[TensorMap]]: # Check metadata @@ -362,6 +368,7 @@ def _correlate_density( cg_coeffs = None else: cg_coeffs = self._cg_coeffs + for iteration in range(n_iterations): # Define the correlation order of the current iteration correlation_order_it = iteration + 2 diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index b2f2784f0..bf09127a4 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -393,7 +393,9 @@ def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2): # \sum_{-m1 \leq l1 \leq m1, -m2 \leq l2 \leq m2} # <λμ|l1m1,l2m2> = δ_μμ' for lam in range(lam_min, lam_max): - cg_mat = cg_cache_dense.coeffs.get(l1, l2, lam).reshape(-1, 2 * lam + 1) + cg_mat = cg_cache_dense.coeffs.block( + {"l1": l1, "l2": l2, "lambda": lam} + ).values.reshape(-1, 2 * lam + 1) dot_product = cg_mat.T @ cg_mat diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape) diag_indices = ( @@ -417,7 +419,9 @@ def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2): l1l2_dim = (2 * l1 + 1) * (2 * l2 + 1) dot_product = _dispatch.zeros_like(float64_like, (l1l2_dim, l1l2_dim)) for lam in range(lam_min, lam_max + 1): - cg_mat = cg_cache_dense.coeffs.get(l1, l2, lam).reshape(-1, 2 * lam + 1) + cg_mat = cg_cache_dense.coeffs.block( + {"l1": l1, "l2": l2, "lambda": lam} + ).values.reshape(-1, 2 * lam + 1) dot_product += cg_mat @ cg_mat.T diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape) diag_indices = ( From 3c3961b1c4253e7f8207379406c639f20bfde000 Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Thu, 15 Feb 2024 06:35:14 +0100 Subject: [PATCH 05/23] adding for torch backend --- .../rascaline/utils/clebsch_gordan/_clebsch_gordan.py | 4 ++-- python/rascaline/tests/utils/correlate_density.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index 0c017d1a1..9bf5f0c74 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -642,7 +642,7 @@ def _combine_blocks_same_samples( Labels( names=["spherical_harmonics_m"], values=_dispatch.int_range_like( - min_val=-lambda_, max_val=lambda_ + 1, like=block_1.values + min_val=-lambda_, max_val=lambda_ + 1, like=block_1.samples.values ).reshape(-1, 1), ), ], @@ -654,7 +654,7 @@ def _combine_blocks_same_samples( + _dispatch.to_int_list(block_1.properties.values[indices[1]]) for indices in block_1_block_2_product_idx ], - block_1.properties.values, + block_1.samples.values, ), ), ) diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index bf09127a4..8673e349b 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -512,9 +512,11 @@ def test_correlate_density_metadata_agree(): ], ) @pytest.mark.parametrize("skip_redundant", [True, False]) +@pytest.mark.parametrize("arrays_backend", ["numpy", "torch"]) def test_correlate_density_angular_selection( selected_keys: Labels, skip_redundant: bool, + arrays_backend: str, ): """ Tests that the correct angular channels are output based on the specified @@ -531,7 +533,7 @@ def test_correlate_density_angular_selection( selected_keys=selected_keys, skip_redundant=skip_redundant, ) - nu_2 = corr_calculator.compute(nu_1) + nu_2 = corr_calculator.compute(nu_1.to(arrays="torch")) if selected_keys is None: assert np.all( From 358d170898632aa57bad4a5033ea7f567a413ec4 Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Thu, 15 Feb 2024 06:55:33 +0100 Subject: [PATCH 06/23] fixing TorchScript --- .../rascaline/torch/utils/clebsch_gordan.py | 2 + .../tests/utils/correlate_density.py | 12 +++--- .../utils/clebsch_gordan/_cg_cache.py | 6 +-- .../utils/clebsch_gordan/_classes.py | 8 +++- .../utils/clebsch_gordan/_clebsch_gordan.py | 2 +- .../utils/clebsch_gordan/correlate_density.py | 39 +++++++++++-------- 6 files changed, 42 insertions(+), 27 deletions(-) diff --git a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py index 2f528cf9c..57629c89b 100644 --- a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py +++ b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py @@ -26,8 +26,10 @@ module.__dict__["LabelsEntry"] = LabelsEntry module.__dict__["torch_jit_is_scripting"] = torch.jit.is_scripting module.__dict__["torch_jit_annotate"] = torch.jit.annotate +module.__dict__["torch_jit_script"] = torch.jit.script module.__dict__["TorchTensor"] = torch.Tensor module.__dict__["TorchModule"] = torch.nn.Module +module.__dict__["TorchScriptClass"] = torch.ScriptClass module.__dict__["Array"] = torch.Tensor diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py index a01fa65ed..d3e65bf97 100644 --- a/python/rascaline-torch/tests/utils/correlate_density.py +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -69,15 +69,15 @@ def test_torch_script_correlate_density_angular_selection( scripted_corr_calculator = torch.jit.script(corr_calculator) # Test compute - nu_2 = corr_calculator.compute(nu_1) + ref_nu_2 = corr_calculator.compute(nu_1) scripted_nu_2 = scripted_corr_calculator.compute(nu_1) - assert metatensor.torch.equal_metadata(scripted_nu_2, nu_2) - assert metatensor.torch.allclose(scripted_nu_2, nu_2) + assert metatensor.torch.equal_metadata(scripted_nu_2, ref_nu_2) + assert metatensor.torch.allclose(scripted_nu_2, ref_nu_2) - # Teste compute_metadata - scripted_nu_2 = scripted_corr_calculator.compute_metadata(nu_1) - assert metatensor.torch.equal_metadata(scripted_nu_2, nu_2) + # Test compute_metadata + #scripted_nu_2 = scripted_corr_calculator.compute_metadata(nu_1) + #assert metatensor.torch.equal_metadata(scripted_nu_2, nu_2) def test_save_load(): diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index 1194b3673..2ec8c9152 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -519,7 +519,7 @@ def combine_arrays( arr_2: Array, lambda_: int, cg_coeffs: TensorMap, - cg_backend: Optional[str] = None, + cg_backend: str, ) -> Array: """ Couples arrays `arr_1` and `arr_2` corresponding to the irreducible @@ -560,13 +560,13 @@ def combine_arrays( of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + 1)]. If it is None we only return an empty array of the shape. :param cg_backend: specifies the combine backend with sparse CG coefficients. - It can have the values "python-sparse" and "mops". + It can have the values "python-dense", "python-sparse", "mops" and "metadata" :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] """ # If just precomputing metadata, return an empty array - if cg_coeffs is None: + if cg_backend == "metadata": return empty_combine(arr_1, arr_2, lambda_) # We have to temporary store it so TorchScript can infer the correct type diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py b/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py index 02e5672a6..2e4d9e56e 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py @@ -11,16 +11,18 @@ def torch_jit_is_scripting(): def torch_jit_annotate(annotation, obj): return obj +def torch_jit_script(func): + return func def is_labels(obj: Any): return isinstance(obj, Labels) - check_isinstance = isinstance try: from torch import Tensor as TorchTensor from torch.nn import Module as TorchModule + from torch import ScriptClass as TorchScriptClass except ImportError: class TorchTensor: @@ -31,6 +33,9 @@ class TorchModule: def __call__(self, *arg, **kwargs): return self.forward(*arg, **kwargs) + class TorchScriptClass: + pass + Array = Union[np.ndarray, TorchTensor] @@ -41,6 +46,7 @@ def __call__(self, *arg, **kwargs): "LabelsEntry", "torch_jit_is_scripting", "torch_jit_annotate", + "torch_jit_scipt" "check_isinstance", "is_labels", ] diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index 9bf5f0c74..1a57a9223 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -591,7 +591,7 @@ def _combine_blocks_same_samples( block_1: TensorBlock, block_2: TensorBlock, lambda_: int, - cg_coeffs: Union[TensorMap, None], + cg_coeffs: TensorMap, cg_backend: str, ) -> TensorBlock: """ diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index d4069240a..890f9fc00 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -15,7 +15,10 @@ TensorBlock, TensorMap, TorchModule, + TorchScriptClass, torch_jit_is_scripting, + torch_jit_annotate, + torch_jit_script, ) @@ -122,7 +125,10 @@ def __init__( if torch_jit_is_scripting(): self._arrays_backend = "torch" else: - self._arrays_backend = "numpy" + if isinstance(Labels, TorchScriptClass): + self._arrays_backend = "torch" + else: + self._arrays_backend = "numpy" elif arrays_backend == "numpy": if torch_jit_is_scripting(): raise ValueError( @@ -202,14 +208,13 @@ def __init__( elif self._arrays_backend == "numpy": array_like = np.empty(0) - self._selected_keys: List[Union[Labels, None]] = ( + self._selected_keys: List[Union[Labels, None]] = \ _clebsch_gordan._parse_selected_keys( n_iterations=n_iterations, array_like=array_like, angular_cutoff=self._angular_cutoff, selected_keys=selected_keys, ) - ) # Parse the bool flags that control skipping of redundant CG combinations # and TensorMap output from each iteration self._skip_redundant, self._output_selection = ( @@ -231,13 +236,15 @@ def correlation_order(self): return self._correlation_order @property - def selected_keys(self): - """ - Outputs the selected keys used in the CG iterations of type List[Union[Labels, - None]]. - """ - # TorchScript cannot infer the type properly so we removed the type hint of - # output + def selected_keys(self) -> List[Union[Labels, None]]: + if torch_jit_is_scripting(): + if torch.jit.isinstance(self._selected_keys, List[Union[Labels, None]]): + return self._selected_keys + else: + selected_keys_: List[Union[None, Labels]] = [ + torch_jit_annotate(Union[None, Labels], None) + ] * len(self._selected_keys) + return selected_keys_ return self._selected_keys @property @@ -279,6 +286,7 @@ def compute(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: compute_metadata=False, ) + @torch_jit_script def compute_metadata( self, density: TensorMap, @@ -303,7 +311,6 @@ def compute_metadata( # ==================================================================== # ===== Private functions that do the work on the TensorMap level # ==================================================================== - # TODO replace arguments with self. def _correlate_density( self, density: TensorMap, compute_metadata: bool ) -> Union[TensorMap, List[TensorMap]]: @@ -344,7 +351,7 @@ def _correlate_density( density.keys, density.keys, n_iterations=n_iterations, - selected_keys=self._selected_keys, + selected_keys=self.selected_keys, #TODO hacky better way? skip_redundant=self._skip_redundant, ) max_angular = max( @@ -365,9 +372,9 @@ def _correlate_density( # Perform iterative CG tensor products density_correlations: List[TensorMap] = [] if compute_metadata: - cg_coeffs = None + cg_backend = "metadata" else: - cg_coeffs = self._cg_coeffs + cg_backend = self._cg_backend for iteration in range(n_iterations): # Define the correlation order of the current iteration @@ -386,8 +393,8 @@ def _correlate_density( density_correlation.block(key_1), density.block(key_2), lambda_out, - cg_coeffs, - self._cg_backend, + self._cg_coeffs, + cg_backend, ) blocks_out.append(block_out) keys_out = key_metadata[iteration][2] From f94b8c4395d841b38c79aca8fbc212705e5b3432 Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Thu, 15 Feb 2024 14:03:39 +0100 Subject: [PATCH 07/23] fix dispatch and refactor tests removed all-deps-torch and merged the tests to all-deps --- .../rascaline/torch/utils/clebsch_gordan.py | 2 +- .../utils/clebsch_gordan/_cg_cache.py | 2 +- .../utils/clebsch_gordan/_classes.py | 13 +++-- .../utils/clebsch_gordan/_clebsch_gordan.py | 3 +- .../utils/clebsch_gordan/correlate_density.py | 21 +++----- .../tests/utils/correlate_density.py | 49 ++++++++++--------- tox.ini | 17 +------ 7 files changed, 46 insertions(+), 61 deletions(-) diff --git a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py index 57629c89b..c9d54a2ce 100644 --- a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py +++ b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py @@ -26,7 +26,7 @@ module.__dict__["LabelsEntry"] = LabelsEntry module.__dict__["torch_jit_is_scripting"] = torch.jit.is_scripting module.__dict__["torch_jit_annotate"] = torch.jit.annotate -module.__dict__["torch_jit_script"] = torch.jit.script +module.__dict__["torch_jit_export"] = torch.jit.export module.__dict__["TorchTensor"] = torch.Tensor module.__dict__["TorchModule"] = torch.nn.Module module.__dict__["TorchScriptClass"] = torch.ScriptClass diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index 2ec8c9152..4fc928366 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -358,7 +358,7 @@ def _build_cg_coeff_dict( [*l1l2lam_dict.values()], double_like ).reshape(-1, 1) l1l2lam_sample_values = _dispatch.int_array_like( - l1l2lam_sample_values, double_like + l1l2lam_sample_values, labels_values_like ) # we have to move put the m1 m2 m3 inside a block so we can access it easier # inside cg combine function, diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py b/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py index 2e4d9e56e..9cd36502b 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py @@ -11,18 +11,19 @@ def torch_jit_is_scripting(): def torch_jit_annotate(annotation, obj): return obj -def torch_jit_script(func): + +def torch_jit_export(func): return func + def is_labels(obj: Any): return isinstance(obj, Labels) -check_isinstance = isinstance try: + from torch import ScriptClass as TorchScriptClass from torch import Tensor as TorchTensor from torch.nn import Module as TorchModule - from torch import ScriptClass as TorchScriptClass except ImportError: class TorchTensor: @@ -43,10 +44,12 @@ class TorchScriptClass: "Labels", "TensorBlock", "TensorMap", + "TorchTensor", + "TorchModule", + "TorchScriptClass", "LabelsEntry", "torch_jit_is_scripting", "torch_jit_annotate", - "torch_jit_scipt" - "check_isinstance", + "torch_jit_export", "is_labels", ] diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index 1a57a9223..fea21ae29 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -262,7 +262,7 @@ def _precompute_keys( keys_1: Labels, keys_2: Labels, n_iterations: int, - selected_keys: List[Union[None, Labels]], + selected_keys: List[Union[Labels, None]], skip_redundant: List[bool], ) -> List[Tuple[List[LabelsEntry], List[LabelsEntry], Labels]]: """ @@ -286,6 +286,7 @@ def _precompute_keys( keys_1=keys_out, keys_2=keys_2, ) + # For TorchScript to determine the type correctly so we can subscript it selected_keys_i = selected_keys[iteration] if selected_keys_i is not None: keys_1_entries, keys_2_entries, keys_out = _apply_key_selection( diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 890f9fc00..418bcc9fa 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -16,9 +16,8 @@ TensorMap, TorchModule, TorchScriptClass, + torch_jit_export, torch_jit_is_scripting, - torch_jit_annotate, - torch_jit_script, ) @@ -109,6 +108,8 @@ class DensityCorrelations(TorchModule): returned instead. """ + _selected_keys: List[Union[Labels, None]] + def __init__( self, max_angular: int, @@ -208,13 +209,14 @@ def __init__( elif self._arrays_backend == "numpy": array_like = np.empty(0) - self._selected_keys: List[Union[Labels, None]] = \ + self._selected_keys: List[Union[Labels, None]] = ( _clebsch_gordan._parse_selected_keys( n_iterations=n_iterations, array_like=array_like, angular_cutoff=self._angular_cutoff, selected_keys=selected_keys, ) + ) # Parse the bool flags that control skipping of redundant CG combinations # and TensorMap output from each iteration self._skip_redundant, self._output_selection = ( @@ -237,14 +239,6 @@ def correlation_order(self): @property def selected_keys(self) -> List[Union[Labels, None]]: - if torch_jit_is_scripting(): - if torch.jit.isinstance(self._selected_keys, List[Union[Labels, None]]): - return self._selected_keys - else: - selected_keys_: List[Union[None, Labels]] = [ - torch_jit_annotate(Union[None, Labels], None) - ] * len(self._selected_keys) - return selected_keys_ return self._selected_keys @property @@ -286,7 +280,7 @@ def compute(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: compute_metadata=False, ) - @torch_jit_script + @torch_jit_export def compute_metadata( self, density: TensorMap, @@ -345,13 +339,12 @@ def _correlate_density( "Clebsch Gordan combinations with gradients not yet implemented." " Use metatensor.remove_gradients to remove gradients from the input." ) - # Pre-compute the keys needed to perform each CG iteration key_metadata = _clebsch_gordan._precompute_keys( density.keys, density.keys, n_iterations=n_iterations, - selected_keys=self.selected_keys, #TODO hacky better way? + selected_keys=self._selected_keys, skip_redundant=self._skip_redundant, ) max_angular = max( diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index 8673e349b..6dcfc7ba7 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -47,6 +47,10 @@ except ImportError: HAS_TORCH = False +if HAS_TORCH: + ARRAYS_BACKEND = ["numpy", "torch"] +else: + ARRAYS_BACKEND = ["numpy"] DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") @@ -71,19 +75,6 @@ } -# ============ Pytest fixtures ============ - - -@pytest.fixture() -def cg_cache_sparse(): - return ClebschGordanReal(lambda_max=5, sparse=True, use_torch=HAS_TORCH) - - -@pytest.fixture() -def cg_cache_dense(): - return ClebschGordanReal(lambda_max=5, sparse=False, use_torch=HAS_TORCH) - - # ============ Helper functions ============ @@ -370,7 +361,8 @@ def test_correlate_density_norm(correlation_order): @pytest.mark.parametrize("l1, l2", [(1, 2), (2, 3), (0, 5)]) -def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2): +@pytest.mark.parametrize("arrays_backend", ARRAYS_BACKEND) +def test_clebsch_gordan_orthogonality(l1, l2, arrays_backend): """ Test orthogonality relationships of cached dense CG coefficients. @@ -378,24 +370,30 @@ def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2): https://en.wikipedia.org/wiki/Clebsch%E2%80%93Gordan_coefficients#Orthogonality_relations for details. """ + cg_coeffs = ClebschGordanReal( + lambda_max=5, sparse=False, use_torch=arrays_backend == "torch" + ).coeffs + lam_min = abs(l1 - l2) lam_max = l1 + l2 - if HAS_TORCH: + if arrays_backend == "torch": int64_like = torch.empty(0, dtype=torch.int64) float64_like = torch.empty(0, dtype=torch.float64) bool_like = torch.empty(0, dtype=torch.bool) - else: + elif arrays_backend == "numpy": int64_like = np.empty(0, dtype=np.int64) float64_like = np.empty(0, dtype=np.float64) bool_like = np.empty(0, dtype=np.bool_) + else: + raise ValueError(f"Not supported arrays backend {arrays_backend}.") # We test lam dimension # \sum_{-m1 \leq l1 \leq m1, -m2 \leq l2 \leq m2} # <λμ|l1m1,l2m2> = δ_μμ' for lam in range(lam_min, lam_max): - cg_mat = cg_cache_dense.coeffs.block( - {"l1": l1, "l2": l2, "lambda": lam} - ).values.reshape(-1, 2 * lam + 1) + cg_mat = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lam}).values.reshape( + -1, 2 * lam + 1 + ) dot_product = cg_mat.T @ cg_mat diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape) diag_indices = ( @@ -419,9 +417,9 @@ def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2): l1l2_dim = (2 * l1 + 1) * (2 * l2 + 1) dot_product = _dispatch.zeros_like(float64_like, (l1l2_dim, l1l2_dim)) for lam in range(lam_min, lam_max + 1): - cg_mat = cg_cache_dense.coeffs.block( - {"l1": l1, "l2": l2, "lambda": lam} - ).values.reshape(-1, 2 * lam + 1) + cg_mat = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lam}).values.reshape( + -1, 2 * lam + 1 + ) dot_product += cg_mat @ cg_mat.T diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape) diag_indices = ( @@ -512,7 +510,7 @@ def test_correlate_density_metadata_agree(): ], ) @pytest.mark.parametrize("skip_redundant", [True, False]) -@pytest.mark.parametrize("arrays_backend", ["numpy", "torch"]) +@pytest.mark.parametrize("arrays_backend", ARRAYS_BACKEND + [None]) def test_correlate_density_angular_selection( selected_keys: Labels, skip_redundant: bool, @@ -532,8 +530,11 @@ def test_correlate_density_angular_selection( angular_cutoff=None, selected_keys=selected_keys, skip_redundant=skip_redundant, + arrays_backend=arrays_backend, ) - nu_2 = corr_calculator.compute(nu_1.to(arrays="torch")) + if arrays_backend is not None: + nu_1 = nu_1.to(arrays=arrays_backend) + nu_2 = corr_calculator.compute(nu_1) if selected_keys is None: assert np.all( diff --git a/tox.ini b/tox.ini index e78e2656a..f22de315d 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,6 @@ envlist = lint min-deps all-deps - all-deps-torch docs-tests torch-tests @@ -51,7 +50,7 @@ commands = # note: platform_system can be "Linux","Darwin", or "Windows". description = Run Python unit tests with all dependencies installed (ase, pyscf, - and chemfiles are optional dependencies) + chemfiles and torch are optional dependencies) deps = {[testenv]metatensor-core-requirement} ase @@ -61,6 +60,7 @@ deps = pytest-cov scipy sympy + torch pyscf;platform_system!="Windows" wigners # TODO: add mops once it becomes stable enough (and potentially supports windows) @@ -68,19 +68,6 @@ deps = commands = pytest {[testenv]test_options} {posargs} -[testenv:all-deps-torch] -# note: platform_system can be "Linux","Darwin", or "Windows". -description = - Run Python unit tests with all dependencies installed (ase, pyscf, - and chemfiles are optional dependencies) and torch as array backend -deps = - {[testenv:all-deps]deps} - torch -commands = - # for the moment only the correlation density tests test on torch arrays - pytest {[testenv]test_options} {posargs} \ - python/rascaline/tests/utils/correlate_density.py - [testenv:min-deps] description = Run Python unit tests with the minimal dependencies installed deps = From 2d621b685755d4686593a203c7329248c96ae406 Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Thu, 15 Feb 2024 14:04:22 +0100 Subject: [PATCH 08/23] remove _dispatch.max_axis not needed --- .../utils/clebsch_gordan/_dispatch.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py b/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py index 417e1e756..37dbbdc4e 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py @@ -211,24 +211,9 @@ def max(array): ``np.max(array)`` or ``torch.max(array)``. """ if isinstance(array, TorchTensor): - return torch.max(input=array) + return torch.max(array) elif isinstance(array, np.ndarray): - return np.max(a=array) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def max_axis(array, axis: int = 0): - """ - Takes the maximun values of the array along the axis. - - This function has the same behavior as - ``np.max(array, axis=axis)`` or ``torch.max(array, dim=axis)``. - """ - if isinstance(array, TorchTensor): - return torch.max(input=array, dim=axis) - elif isinstance(array, np.ndarray): - return np.max(a=array, axis=axis) + return np.max(array) else: raise TypeError(UNKNOWN_ARRAY_TYPE) From 25f7ad3c3a3f8c62ae94a702496725656408fe7b Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Thu, 15 Feb 2024 14:04:40 +0100 Subject: [PATCH 09/23] add tests for properties of DensityCorrelations --- .../tests/utils/correlate_density.py | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py index d3e65bf97..d6f677fde 100644 --- a/python/rascaline-torch/tests/utils/correlate_density.py +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import io import os -from typing import List +from typing import Any, List import ase.io import metatensor.torch @@ -15,6 +15,13 @@ DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") + +def is_tensor_map(obj: Any): + return isinstance(obj, TensorMap) + + +is_tensor_map = torch.jit.script(is_tensor_map) + SPHEX_HYPERS = { "cutoff": 2.5, # Angstrom "max_radial": 3, # Exclusive @@ -66,18 +73,26 @@ def test_torch_script_correlate_density_angular_selection( skip_redundant=skip_redundant, ) + ref_nu_2 = corr_calculator.compute(nu_1) scripted_corr_calculator = torch.jit.script(corr_calculator) # Test compute - ref_nu_2 = corr_calculator.compute(nu_1) scripted_nu_2 = scripted_corr_calculator.compute(nu_1) - assert metatensor.torch.equal_metadata(scripted_nu_2, ref_nu_2) assert metatensor.torch.allclose(scripted_nu_2, ref_nu_2) # Test compute_metadata - #scripted_nu_2 = scripted_corr_calculator.compute_metadata(nu_1) - #assert metatensor.torch.equal_metadata(scripted_nu_2, nu_2) + scripted_nu_2 = scripted_corr_calculator.compute_metadata(nu_1) + assert metatensor.torch.equal_metadata(scripted_nu_2, ref_nu_2) + + # Test if properties are accesible + assert isinstance(corr_calculator.correlation_order, int) + assert isinstance(corr_calculator.selected_keys, list) + assert isinstance(corr_calculator.skip_redundant, list) + assert isinstance(corr_calculator.output_selection, list) + assert isinstance(corr_calculator.arrays_backend, str) + assert isinstance(corr_calculator.cg_backend, str) + assert is_tensor_map(corr_calculator.cg_coeffs) def test_save_load(): From 39aee61bef77cd0286cb745c77363c6497264cde Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Thu, 15 Feb 2024 14:26:24 +0100 Subject: [PATCH 10/23] simplify _parse_selected_keys, now it does not need to be scritable --- .../utils/clebsch_gordan/_clebsch_gordan.py | 59 ++++--------------- 1 file changed, 12 insertions(+), 47 deletions(-) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index fea21ae29..dc5c7aed7 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -15,7 +15,6 @@ TensorMap, is_labels, torch_jit_annotate, - torch_jit_is_scripting, ) @@ -80,19 +79,7 @@ def _parse_selected_keys( ) if isinstance(selected_keys, list): - # Both if conditions check the same thing, the second is for metetensor-core and - # metatensor-torch, the first one for torch-scripted metatensor-torch - if torch_jit_is_scripting(): - if not all( - [ - isinstance(selected_keys[i], Labels) or (selected_keys[i] is None) - for i in range(len(selected_keys)) - ] - ): - raise TypeError( - "`selected_keys` must be a Labels or List[Union[None, Labels]]" - ) - elif not all( + if not all( [ is_labels(selected_keys[i]) or (selected_keys[i] is None) for i in range(len(selected_keys)) @@ -114,40 +101,22 @@ def _parse_selected_keys( if selected_keys is None: if angular_cutoff is None: # no selections at all - selected_keys_ = [ - torch_jit_annotate(Union[None, Labels], None) - ] * n_iterations + selected_keys_ = [None] * n_iterations else: # Create a key selection with all angular channels <= the specified # angular cutoff - label: Union[None, Labels] = torch_jit_annotate( - Union[None, Labels], - Labels( - names=["spherical_harmonics_l"], - values=_dispatch.int_array_like( - list(range(0, angular_cutoff)), like=array_like - ).reshape(-1, 1), - ), + label = Labels( + names=["spherical_harmonics_l"], + values=_dispatch.int_array_like( + list(range(0, angular_cutoff)), like=array_like + ).reshape(-1, 1), ) selected_keys_ = [label] * n_iterations - # Both if conditions check the same thing, we cannot write them out into one - # condition, because otherwise the TorchScript compiler cannot infer that - # selected_keys is Labels. We need both because isinstance(selected, Labels) works - # with metatensor-torch only when scripted - if torch_jit_is_scripting(): - if isinstance(selected_keys, Labels): - # Create a list, but only apply a key selection at the final iteration - selected_keys_ = [torch_jit_annotate(Union[None, Labels], None)] * ( - n_iterations - 1 - ) - selected_keys_.append(torch_jit_annotate(Labels, selected_keys)) - elif is_labels(selected_keys): + if is_labels(selected_keys): # Create a list, but only apply a key selection at the final iteration - selected_keys_ = [torch_jit_annotate(Union[None, Labels], None)] * ( - n_iterations - 1 - ) - selected_keys_.append(torch_jit_annotate(Labels, selected_keys)) + selected_keys_ = [None] * (n_iterations - 1) + selected_keys_.append(selected_keys) elif isinstance(selected_keys, list): selected_keys_ = selected_keys @@ -161,12 +130,8 @@ def _parse_selected_keys( for slct in selected_keys_: if slct is None: continue - if torch_jit_is_scripting(): - if not (isinstance(slct, Labels)): - raise ValueError("Asserted that elements in `slct` are Labels") - else: - if not (is_labels(slct)): - raise ValueError("Asserted that elements in `slct` are Labels") + if not (is_labels(slct)): + raise ValueError("Asserted that elements in `slct` are Labels") if not all( [ From f6d88efcf7db911369bc87364e8e2f22d410cb8b Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Thu, 15 Feb 2024 18:04:49 +0100 Subject: [PATCH 11/23] Make CG cache contiguous, fix some --- .../utils/clebsch_gordan/_cg_cache.py | 6 ++-- .../utils/clebsch_gordan/_dispatch.py | 20 +++++++++++ .../utils/clebsch_gordan/correlate_density.py | 36 +++++++++---------- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index 4fc928366..4ba79083a 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -364,7 +364,7 @@ def _build_cg_coeff_dict( # inside cg combine function, blocks.append( TensorBlock( - values=values, + values=_dispatch.contiguous(values), samples=Labels(["m1", "m2", "mu"], l1l2lam_sample_values), components=[], properties=Labels.range("property", 1), @@ -384,7 +384,9 @@ def _build_cg_coeff_dict( block_value_shape = (1,) + l1l2lam_values.shape + (1,) blocks.append( TensorBlock( - values=l1l2lam_values.reshape(block_value_shape), + values=_dispatch.contiguous( + l1l2lam_values.reshape(block_value_shape) + ), samples=Labels.range("sample", 1), components=[ Labels( diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py b/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py index 37dbbdc4e..02f66dcc5 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py @@ -78,6 +78,26 @@ def argsort(array): raise TypeError(UNKNOWN_ARRAY_TYPE) +def contiguous(array): + """ + Returns a contiguous array. + + It is equivalent of np.ascontiguousarray(array) and tensor.contiguous(). In + the case of numpy, C order is used for consistency with torch. As such, only + C-contiguity is checked. + """ + if isinstance(array, TorchTensor): + if array.is_contiguous(): + return array + return array.contiguous() + elif isinstance(array, np.ndarray): + if array.flags["C_CONTIGUOUS"]: + return array + return np.ascontiguousarray(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + def unique(array, axis: Optional[int] = None): """Find the unique elements of an array.""" if isinstance(array, TorchTensor): diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 418bcc9fa..14af866ff 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -266,14 +266,14 @@ def forward(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: def compute(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: """ - Performs the density correlations for public functions - :py:func:`correlate_density` and :py:func:`correlate_density_metadata`. - - :param density: A density descriptor of body order 2 (correlation order 1), - in :py:class:`TensorMap` format. This may be, for example, a rascaline - :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. - Alternatively, this could be multi-center descriptor, such as a pair - density. + Computes the density correlations by taking iterative Clebsch-Gordan + (CG) tensor products of the input `density` descriptor with itself. + + :param density: A density descriptor of body order 2 (correlation order + 1), in :py:class:`TensorMap` format. This may be, for example, a + rascaline :py:class:`SphericalExpansion` or + :py:class:`LodeSphericalExpansion`. Alternatively, this could be + multi-center descriptor, such as a pair density. """ return self._correlate_density( density, @@ -286,16 +286,16 @@ def compute_metadata( density: TensorMap, ) -> Union[TensorMap, List[TensorMap]]: """ - Returns the metadata-only :py:class:`TensorMap`(s) that would be output by - the function :py:func:`correlate_density` under the same settings, without - perfoming the actual Clebsch-Gordan tensor products. See this function for - full documentation. - - :param density: A density descriptor of body order 2 (correlation order 1), - in :py:class:`TensorMap` format. This may be, for example, a rascaline - :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. - Alternatively, this could be multi-center descriptor, such as a pair - density. + Returns the metadata-only :py:class:`TensorMap`(s) that would be output + by the function :py:meth:`compute` for the same calculator under the + same settings, without perfoming the actual Clebsch-Gordan tensor + products. + + :param density: A density descriptor of body order 2 (correlation order + 1), in :py:class:`TensorMap` format. This may be, for example, a + rascaline :py:class:`SphericalExpansion` or + :py:class:`LodeSphericalExpansion`. Alternatively, this could be + multi-center descriptor, such as a pair density. """ return self._correlate_density( density, From 3513558a68a66c6775db6a35ff9fd9f650949db5 Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Thu, 15 Feb 2024 18:07:10 +0100 Subject: [PATCH 12/23] Remove comment block --- .../rascaline/utils/clebsch_gordan/correlate_density.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 14af866ff..340662504 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -227,12 +227,6 @@ def __init__( ) ) - # Compute CG coefficient cache - # TODO: keys have been precomputed, so perhaps we don't need to - # compute all CG coefficients up to max_angular here. - # TODO: use sparse cache by default until we understand under which - # circumstances (and if) dense is faster - @property def correlation_order(self): return self._correlation_order From 1504c1928184ae50227315875064e03e31733111 Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Fri, 16 Feb 2024 11:19:53 +0100 Subject: [PATCH 13/23] Update python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py --- .../rascaline/utils/clebsch_gordan/_clebsch_gordan.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index dc5c7aed7..333da24ea 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -115,8 +115,7 @@ def _parse_selected_keys( if is_labels(selected_keys): # Create a list, but only apply a key selection at the final iteration - selected_keys_ = [None] * (n_iterations - 1) - selected_keys_.append(selected_keys) + selected_keys_ = [None] * (n_iterations - 1) + [selected_keys] elif isinstance(selected_keys, list): selected_keys_ = selected_keys From d024c7ff769adb89a76218a68788eb99e77fa63a Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Fri, 16 Feb 2024 11:22:14 +0100 Subject: [PATCH 14/23] Update python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py --- .../rascaline/utils/clebsch_gordan/correlate_density.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 340662504..8f1b7a262 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -186,8 +186,6 @@ def __init__( sparse = True use_mops = True - # We cannot store this into one member variable because TorchScript - # has problems scripting which cannot be resolved with isinstance checks self._cg_coeffs = _cg_cache.ClebschGordanReal( self._max_angular, sparse=sparse, From 5a69107d4c0da9d9318a3979def86b4a0a405770 Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Fri, 16 Feb 2024 16:01:28 +0100 Subject: [PATCH 15/23] Test save/load for checking contiguous. Clean up. Docstring arg. --- .../tests/utils/correlate_density.py | 19 ++++++++++- .../utils/clebsch_gordan/correlate_density.py | 33 +++++++++++-------- .../tests/utils/correlate_density.py | 24 +------------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py index d6f677fde..041374021 100644 --- a/python/rascaline-torch/tests/utils/correlate_density.py +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -95,7 +95,7 @@ def test_torch_script_correlate_density_angular_selection( assert is_tensor_map(corr_calculator.cg_coeffs) -def test_save_load(): +def test_jit_save_load(): corr_calculator = DensityCorrelations( max_angular=2, correlation_order=2, @@ -107,3 +107,20 @@ def test_save_load(): buffer.seek(0) torch.jit.load(buffer) buffer.close() + + +def test_save_load(): + """Tests for saving and loading with cg_backend="python-dense", + which makes the DensityCorrelations object non-scriptable due to + a non-contiguous CG cache.""" + corr_calculator = DensityCorrelations( + max_angular=2, + correlation_order=2, + angular_cutoff=1, + cg_backend="python-dense", + ) + buffer = io.BytesIO() + torch.save(corr_calculator, buffer) + buffer.seek(0) + torch.load(buffer) + buffer.close() diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 8f1b7a262..dff14e6c4 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -63,6 +63,10 @@ class DensityCorrelations(TorchModule): controlled with arguments `angular_cutoff`, `angular_selection` and `parity_selection`. + :param max_angular: The maximum angular order for which CG coefficients + should be computed and stored. This must be large enough to cover the + maximum angular order reached in the CG iterations on a density input to + the :py:meth:`compute` method. :param correlation_order: The desired correlation order of the output descriptor. Must be >= 1. :param angular_cutoff: The maximum angular channel to compute at any given @@ -88,19 +92,22 @@ class DensityCorrelations(TorchModule): will be returned. If a :py:class:`list` of :py:class:`bool` is passed, this controls the output at each corresponding iteration. If None is passed, only the final iteration is output. - :param arrays_backend: Determines the array backend be "numpy" or "torch" - :param cg_backend: Determines the backend for the CG combination. It can - be even "python-sparse", "python-dense" or "mops". If the CG combination - performs on the sparse coefficients, it means that for each (l1, l2, lambda) - block the (m1, m2, mu) coefficients are stored in a sparse format only storing - the nonzero coefficients. If the parameter are None, the most optimal choice is - determined given available packages and ``arrays_backend``. - - "python-dense": Uses the python implementation performing the combinations - with the dense CG coefficients. - - "python-sparse": Uses the python implementation performing the combinations - with the sparse CG coefficients. - - "mops": Uses the package ``mops`` that optimized the sparse combinations. - At the moment it is only available with "numpy" as ``arrays_backend`` + :param arrays_backend: Determines the array backend, either "numpy" or + "torch". + :param cg_backend: Determines the backend for the CG combination. It can be + even "python-sparse", "python-dense" or "mops". If the CG combination + performs on the sparse coefficients, it means that for each (l1, l2, + lambda) block the (m1, m2, mu) coefficients are stored in a sparse + format only storing the nonzero coefficients. If the parameter are None, + the most optimal choice is determined given available packages and + ``arrays_backend``. + - "python-dense": Uses the python implementation performing the + combinations with the dense CG coefficients. + - "python-sparse": Uses the python implementation performing + the combinations with the sparse CG coefficients. + - "mops": Uses the package ``mops`` that optimized the sparse + combinations. At the moment it is only available with "numpy" as + ``arrays_backend`` :return: A :py:class:`list` of :py:class:`TensorMap` corresponding to the density correlations output from the specified iterations. If the output diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index 6dcfc7ba7..46b2ebba0 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -11,10 +11,7 @@ from rascaline.utils import PowerSpectrum from rascaline.utils.clebsch_gordan import _dispatch from rascaline.utils.clebsch_gordan._cg_cache import ClebschGordanReal -from rascaline.utils.clebsch_gordan._clebsch_gordan import ( - _precompute_keys, - _standardize_keys, -) +from rascaline.utils.clebsch_gordan._clebsch_gordan import _standardize_keys from rascaline.utils.clebsch_gordan.correlate_density import DensityCorrelations @@ -150,25 +147,6 @@ def get_norm(tensor: TensorMap): return norm -def get_max_angular(density: TensorMap, calculator: DensityCorrelations): - key_metadata = _precompute_keys( - density.keys, - density.keys, - n_iterations=calculator._n_iterations, - selected_keys=calculator._selected_keys, - skip_redundant=calculator._skip_redundant, - ) - return max( - _dispatch.max(density.keys.column("spherical_harmonics_l")), - max( - [ - int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) - for mdata in key_metadata - ] - ), - ) - - # ============ Test equivariance ============ From a45b73c6ec8f89570f49ae8dd4a263e159c27a7a Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Sat, 17 Feb 2024 20:09:16 +0100 Subject: [PATCH 16/23] Partial resolution of review comments --- .../rascaline/torch/utils/__init__.py | 4 +- .../rascaline/torch/utils/clebsch_gordan.py | 2 +- .../tests/utils/correlate_density.py | 46 +- .../_classes.py => _backend.py} | 5 + .../utils/{clebsch_gordan => }/_dispatch.py | 304 +++++++++---- .../utils/clebsch_gordan/__init__.py | 2 +- .../utils/clebsch_gordan/_cg_cache.py | 423 ++++++++++-------- .../utils/clebsch_gordan/_clebsch_gordan.py | 62 +-- .../utils/clebsch_gordan/correlate_density.py | 46 +- .../utils/power_spectrum/_classes.py | 13 - .../utils/power_spectrum/_dispatch.py | 167 ------- .../utils/power_spectrum/calculator.py | 6 +- .../tests/utils/correlate_density.py | 40 +- .../tests/utils/data/h2_isolated.xyz | 4 - .../tests/utils/data/h2o_isolated.xyz | 5 - .../tests/utils/data/h2o_periodic.xyz | 5 - 16 files changed, 572 insertions(+), 562 deletions(-) rename python/rascaline/rascaline/utils/{clebsch_gordan/_classes.py => _backend.py} (89%) rename python/rascaline/rascaline/utils/{clebsch_gordan => }/_dispatch.py (66%) delete mode 100644 python/rascaline/rascaline/utils/power_spectrum/_classes.py delete mode 100644 python/rascaline/rascaline/utils/power_spectrum/_dispatch.py delete mode 100644 python/rascaline/tests/utils/data/h2_isolated.xyz delete mode 100644 python/rascaline/tests/utils/data/h2o_isolated.xyz delete mode 100644 python/rascaline/tests/utils/data/h2o_periodic.xyz diff --git a/python/rascaline-torch/rascaline/torch/utils/__init__.py b/python/rascaline-torch/rascaline/torch/utils/__init__.py index 9bd890fa2..6670685e5 100644 --- a/python/rascaline-torch/rascaline/torch/utils/__init__.py +++ b/python/rascaline-torch/rascaline/torch/utils/__init__.py @@ -1,6 +1,6 @@ import os -from . import clebsch_gordan +from .clebsch_gordan import DensityCorrelations from .power_spectrum import PowerSpectrum @@ -11,4 +11,4 @@ Path containing the CMake configuration files for the underlying C library """ -__all__ = ["PowerSpectrum", "clebsch_gordan"] +__all__ = ["PowerSpectrum", "DensityCorrelations"] diff --git a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py index c9d54a2ce..d3ac6b41e 100644 --- a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py +++ b/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py @@ -11,7 +11,7 @@ # For details what is happening here take a look an `rascaline.torch.calculators`. -# Step 1: create te `_classes` module as an empty module +# Step 1: create the `_classes` module as an empty module spec = importlib.util.spec_from_loader( "rascaline.torch.utils.clebsch_gordan._classes", loader=None, diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py index 041374021..fd5687c65 100644 --- a/python/rascaline-torch/tests/utils/correlate_density.py +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -22,16 +22,20 @@ def is_tensor_map(obj: Any): is_tensor_map = torch.jit.script(is_tensor_map) -SPHEX_HYPERS = { - "cutoff": 2.5, # Angstrom - "max_radial": 3, # Exclusive - "max_angular": 3, # Inclusive +SPHERICAL_EXPANSION_HYPERS = { + "cutoff": 2.5, + "max_radial": 3, + "max_angular": 3, "atomic_gaussian_width": 0.2, "radial_basis": {"Gto": {}}, "cutoff_function": {"ShiftedCosine": {"width": 0.5}}, "center_atom_weight": 1.0, } +SELECTED_KEYS = Labels( + names=["spherical_harmonics_l"], values=torch.tensor([1, 3]).reshape(-1, 1) +) + def h2o_isolated(): return ase.io.read(os.path.join(DATA_ROOT, "h2o_isolated.xyz"), ":") @@ -39,20 +43,12 @@ def h2o_isolated(): def spherical_expansion(frames: List[ase.Atoms]): """Returns a rascaline SphericalExpansion""" - calculator = rascaline.torch.SphericalExpansion(**SPHEX_HYPERS) + calculator = rascaline.torch.SphericalExpansion(**SPHERICAL_EXPANSION_HYPERS) return calculator.compute(rascaline.torch.systems_to_torch(frames)) # copy of def test_correlate_density_angular_selection( -@pytest.mark.parametrize( - "selected_keys", - [ - None, - Labels( - names=["spherical_harmonics_l"], values=torch.tensor([1, 3]).reshape(-1, 1) - ), - ], -) +@pytest.mark.parametrize("selected_keys", [None, SELECTED_KEYS]) @pytest.mark.parametrize("skip_redundant", [True, False]) def test_torch_script_correlate_density_angular_selection( selected_keys: Labels, @@ -66,7 +62,7 @@ def test_torch_script_correlate_density_angular_selection( nu_1 = spherical_expansion(frames) correlation_order = 2 corr_calculator = DensityCorrelations( - max_angular=SPHEX_HYPERS["max_angular"] * correlation_order, + max_angular=SPHERICAL_EXPANSION_HYPERS["max_angular"] * correlation_order, correlation_order=correlation_order, angular_cutoff=None, selected_keys=selected_keys, @@ -102,11 +98,11 @@ def test_jit_save_load(): angular_cutoff=1, ) scripted_correlate_density = torch.jit.script(corr_calculator) - buffer = io.BytesIO() - torch.jit.save(scripted_correlate_density, buffer) - buffer.seek(0) - torch.jit.load(buffer) - buffer.close() + with io.BytesIO() as buffer: + torch.jit.save(scripted_correlate_density, buffer) + buffer.seek(0) + torch.jit.load(buffer) + buffer.close() def test_save_load(): @@ -119,8 +115,8 @@ def test_save_load(): angular_cutoff=1, cg_backend="python-dense", ) - buffer = io.BytesIO() - torch.save(corr_calculator, buffer) - buffer.seek(0) - torch.load(buffer) - buffer.close() + with io.BytesIO() as buffer: + torch.save(corr_calculator, buffer) + buffer.seek(0) + torch.load(buffer) + buffer.close() diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py b/python/rascaline/rascaline/utils/_backend.py similarity index 89% rename from python/rascaline/rascaline/utils/clebsch_gordan/_classes.py rename to python/rascaline/rascaline/utils/_backend.py index 9cd36502b..7e581153e 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_classes.py +++ b/python/rascaline/rascaline/utils/_backend.py @@ -3,6 +3,9 @@ import numpy as np from metatensor import Labels, LabelsEntry, TensorBlock, TensorMap +from ..calculator_base import CalculatorBase +from ..systems import IntoSystem + def torch_jit_is_scripting(): return False @@ -41,6 +44,8 @@ class TorchScriptClass: Array = Union[np.ndarray, TorchTensor] __all__ = [ + "CalculatorBase", + "IntoSystem", "Labels", "TensorBlock", "TensorMap", diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py b/python/rascaline/rascaline/utils/_dispatch.py similarity index 66% rename from python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py rename to python/rascaline/rascaline/utils/_dispatch.py index 02f66dcc5..97aaf293f 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py +++ b/python/rascaline/rascaline/utils/_dispatch.py @@ -1,5 +1,7 @@ -""" -Module containing dispatch functions for numpy/torch CG combination operations. +"""Helper functions to dispatch methods between numpy and torch. + +The functions are similar to those in metatensor-operations. Missing functions may +already exist there. Functions are ordered alphabetically. """ import itertools @@ -7,13 +9,14 @@ import numpy as np -from ._classes import TorchTensor - try: import torch + from torch import Tensor as TorchTensor except ImportError: - pass + + class TorchTensor: + pass UNKNOWN_ARRAY_TYPE = ( @@ -29,6 +32,15 @@ def _check_all_torch_tensor(arrays: List[TorchTensor]): ) +# def _check_all_torch_tensor(arrays: List[TorchTensor]): +# for array in arrays: +# if not isinstance(array, TorchTensor): +# raise TypeError( +# f"expected argument to be a torch.Tensor, but got +# {type(array)}" +# ) + + def _check_all_np_ndarray(arrays): for array in arrays: if not isinstance(array, np.ndarray): @@ -37,6 +49,216 @@ def _check_all_np_ndarray(arrays): ) +# def _check_all_np_ndarray(arrays): +# for array in arrays: +# if not isinstance(array, np.ndarray): +# raise TypeError( +# f"expected argument to be a np.ndarray, but got {type(array)}" +# ) + + +def concatenate(arrays: List[TorchTensor], axis: int): + """ + Concatenate a group of arrays along a given axis. + + This function has the same behavior as ``numpy.concatenate(arrays, axis)`` + and ``torch.concatenate(arrays, axis)``. + + Passing `axis` as ``0`` is equivalent to :py:func:`numpy.vstack`, ``1`` to + :py:func:`numpy.hstack`, and ``2`` to :py:func:`numpy.dstack`, though any + axis index > 0 is valid. + """ + if isinstance(arrays[0], TorchTensor): + _check_all_torch_tensor(arrays) + return torch.concatenate(arrays, axis) + elif isinstance(arrays[0], np.ndarray): + _check_all_np_ndarray(arrays) + return np.concatenate(arrays, axis) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +# def concatenate(arrays, axis: Optional[int] = 0): +# """Concatenate arrays along an axis.""" +# if isinstance(arrays[0], TorchTensor): +# return torch.cat(arrays, dim=axis) +# elif isinstance(arrays[0], np.ndarray): +# return np.concatenate(arrays, axis=axis) +# else: +# raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): + """ + Create an uninitialized array, with the given ``shape``, and similar dtype, + device and other options as ``array``. + + If ``shape`` is :py:obj:`None`, the array shape is used instead. + ``requires_grad`` is only used for torch tensors, and set the corresponding + value on the returned array. + + This is the equivalent to ``np.empty_like(array, shape=shape)``. + """ + if isinstance(array, TorchTensor): + if shape is None: + shape = array.size() + return torch.empty( + shape, + dtype=array.dtype, + layout=array.layout, + device=array.device, + ).requires_grad_(requires_grad) + elif isinstance(array, np.ndarray): + return np.empty_like(array, shape=shape, subok=False) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +# def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): +# """ +# Create an empty array, with the given ``shape``, and similar dtype, device +# and other options as ``array``. + +# If ``shape`` is :py:obj:`None`, the array shape is used instead. +# ``requires_grad`` is only used for torch tensors, and set the corresponding +# value on the returned array. + +# This is the equivalent to ``np.empty_like(array, shape=shape)``. +# """ +# if isinstance(array, TorchTensor): +# if shape is None: +# shape = array.size() + +# return torch.empty( +# shape, +# dtype=array.dtype, +# layout=array.layout, +# device=array.device, +# ).requires_grad_(requires_grad) +# elif isinstance(array, np.ndarray): +# return np.empty_like(array, shape=shape, subok=False) +# else: +# raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def list_to_array(array, data: List[List[int]]): + """Create an object from data with the same type as ``array``.""" + if isinstance(array, TorchTensor): + return torch.tensor(data) + elif isinstance(array, np.ndarray): + return np.array(data) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def matmul(a, b): + """Matrix product of two arrays.""" + if isinstance(a, TorchTensor): + _check_all_torch_tensor([b]) + return torch.matmul(a, b) + elif isinstance(a, np.ndarray): + _check_all_np_ndarray([b]) + return np.matmul(a, b) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def to_index_array(array): + """Returns an array that is suitable for indexing a dimension of + a different array. + After a few checks (int, 1D), this operation will convert the dtype to + torch.long (which is, in some torch versions, the only acceptable type + of index tensor). Numpy arrays are left unchanged. + """ + if len(array.shape) != 1: + raise ValueError("Index arrays must be 1D") + + if isinstance(array, TorchTensor): + if torch.is_floating_point(array): + raise ValueError("Index arrays must be integers") + return array.to(torch.long) + elif isinstance(array, np.ndarray): + if not np.issubdtype(array.dtype, np.integer): + raise ValueError("Index arrays must be integers") + return array + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def unique(array, axis: Optional[int] = None): + """Find the unique elements of an array.""" + if isinstance(array, TorchTensor): + return torch.unique(array, dim=axis) + elif isinstance(array, np.ndarray): + return np.unique(array, axis=axis) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +# def unique(array, axis: Optional[int] = None): +# """Find the unique elements of an array.""" +# if isinstance(array, TorchTensor): +# return torch.unique(array, dim=axis) +# elif isinstance(array, np.ndarray): +# return np.unique(array, axis=axis) + + +def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): + """ + Create an array filled with zeros, with the given ``shape``, and similar + dtype, device and other options as ``array``. + + If ``shape`` is :py:obj:`None`, the array shape is used instead. + ``requires_grad`` is only used for torch tensors, and set the corresponding + value on the returned array. + + This is the equivalent to ``np.zeros_like(array, shape=shape)``. + """ + if isinstance(array, TorchTensor): + if shape is None: + shape = array.size() + + return torch.zeros( + shape, + dtype=array.dtype, + layout=array.layout, + device=array.device, + ).requires_grad_(requires_grad) + elif isinstance(array, np.ndarray): + if shape is None: + shape = array.shape + return np.zeros_like(array, shape=shape, subok=False) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +# def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): +# """ +# Create an array filled with zeros, with the given ``shape``, and similar +# dtype, device and other options as ``array``. + +# If ``shape`` is :py:obj:`None`, the array shape is used instead. +# ``requires_grad`` is only used for torch tensors, and set the corresponding +# value on the returned array. + +# This is the equivalent to ``np.zeros_like(array, shape=shape)``. +# """ +# if isinstance(array, TorchTensor): +# if shape is None: +# shape = array.size() + +# return torch.zeros( +# shape, +# dtype=array.dtype, +# layout=array.layout, +# device=array.device, +# ).requires_grad_(requires_grad) +# elif isinstance(array, np.ndarray): +# return np.zeros_like(array, shape=shape, subok=False) +# else: +# raise TypeError(UNKNOWN_ARRAY_TYPE) + + def where(array): """Return the indices where `array` is True. @@ -98,14 +320,6 @@ def contiguous(array): raise TypeError(UNKNOWN_ARRAY_TYPE) -def unique(array, axis: Optional[int] = None): - """Find the unique elements of an array.""" - if isinstance(array, TorchTensor): - return torch.unique(array, dim=axis) - elif isinstance(array, np.ndarray): - return np.unique(array, axis=axis) - - def to_int_list(array) -> List[int]: if isinstance(array, TorchTensor): # we need to do it this way because of @@ -189,16 +403,6 @@ def cartesian_prod(array1, array2): raise TypeError(UNKNOWN_ARRAY_TYPE) -def concatenate(arrays, axis: Optional[int] = 0): - """Concatenate arrays along an axis.""" - if isinstance(arrays[0], TorchTensor): - return torch.cat(arrays, dim=axis) - elif isinstance(arrays[0], np.ndarray): - return np.concatenate(arrays, axis=axis) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - def all(array, axis: Optional[int] = None): """Test whether all array elements along a given axis evaluate to True. @@ -256,60 +460,6 @@ def any(array): raise TypeError(UNKNOWN_ARRAY_TYPE) -def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): - """ - Create an array filled with zeros, with the given ``shape``, and similar - dtype, device and other options as ``array``. - - If ``shape`` is :py:obj:`None`, the array shape is used instead. - ``requires_grad`` is only used for torch tensors, and set the corresponding - value on the returned array. - - This is the equivalent to ``np.zeros_like(array, shape=shape)``. - """ - if isinstance(array, TorchTensor): - if shape is None: - shape = array.size() - - return torch.zeros( - shape, - dtype=array.dtype, - layout=array.layout, - device=array.device, - ).requires_grad_(requires_grad) - elif isinstance(array, np.ndarray): - return np.zeros_like(array, shape=shape, subok=False) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): - """ - Create an empty array, with the given ``shape``, and similar - dtype, device and other options as ``array``. - - If ``shape`` is :py:obj:`None`, the array shape is used instead. - ``requires_grad`` is only used for torch tensors, and set the corresponding - value on the returned array. - - This is the equivalent to ``np.empty_like(array, shape=shape)``. - """ - if isinstance(array, TorchTensor): - if shape is None: - shape = array.size() - - return torch.empty( - shape, - dtype=array.dtype, - layout=array.layout, - device=array.device, - ).requires_grad_(requires_grad) - elif isinstance(array, np.ndarray): - return np.empty_like(array, shape=shape, subok=False) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - def allclose( a: TorchTensor, b: TorchTensor, diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py index 0aa1ea28d..1da995612 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py @@ -1,4 +1,4 @@ -from .correlate_density import DensityCorrelations # noqa +from .correlate_density import DensityCorrelations __all__ = [ diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index 4ba79083a..e2b43d81d 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -9,8 +9,8 @@ import numpy as np import wigners -from . import _dispatch -from ._classes import ( +from .. import _dispatch +from .._backend import ( Array, Labels, TensorBlock, @@ -64,88 +64,8 @@ class ClebschGordanReal(TorchModule): Class for computing Clebsch-Gordan coefficients for real spherical harmonics. - Stores the coefficients in a dictionary in the `self.coeffs` attribute, - which is built at initialization. There are 3 current use cases for the - format of these coefficients. By default, sparse accumulation of products is - performed, whether or not Mops is installed. - - Case 1: standard sparse format. - - Each dictionary entry is a dictionary with entries for each (m1, m2, mu) - combination. - - { - (l1, l2, lambda): { - (m1, m2, mu) : cg_{m1, m2, mu}^{l1, l2, lambda} - for m1 in range(-l1, l1 + 1), - for m2 in range(-l2, l2 + 1), - }, - ... - for l1 in range(0, l1_list) - for l2 in range(0, l2_list) - for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|)) - } - - Case 2: standard dense format. - - Each dictionary entry is a dense array with shape (2 * l1 + 1, 2 * l2 + 1, 2 - * lambda + 1). - - { - (l1, l2, lambda): - array( - cg_{m1, m2, mu}^{l1, l2, lambda} - ... - for m1 in range(-l1, l1 + 1), - for m2 in range(-l2, l2 + 1), - for mu in range(-lambda, lambda + 1), - - shape=(2 * l1 + 1, 2 * l2 + 1, 2 * lambda + 1), - ) - ... - for l1 in range(0, l1_list) - for l2 in range(0, l2_list) - for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|)) - } - - Case 3: MOPS sparse format. - - Each dictionary entry contains a tuple with four 1D arrays, corresponding to - the CG coeffs and m1, m2, mu indices respectively. All of these arrays are - sorted according to the mu index. This format is used for Sparse - Accumulation of Products (SAP) as implemented in MOPS. See - https://github.com/lab-cosmo/mops . - - { - (l1, l2, lambda): - ( - [ - cg_{m1, m2, mu}^{l1, l2, lambda} - ... - for m1 in range(-l1, l1 + 1), - for m2 in range(-l2, l2 + 1), - for mu in range(-lambda, lambda + 1) - ], - [ - m1 for m1 in range(-l1, l1 + 1), - ], - [ - m2 for m2 in range(-l2, l2 + 1), - ], - [ - mu for mu in range(-lambda, lambda + 1), - ], - ) - - - } - - where `cg_{m1, m2, mu}^{l1, l2, lambda}` is the Clebsch-Gordan coefficient - that describes the combination of the `m1` irreducible component of the `l1` - angular channel and the `m2` irreducible component of the `l2` angular - channel into the irreducible tensor of order `lambda`. In all cases, these - correspond to the non-zero CG coefficients, i.e. those in the range |-l, - ..., +l| for each angular order l in {l1, l2, lambda}. + Stores the coefficients in the `self._cg_coeffs` attribute in TensorMap + format, which is built at initialization. :param lambda_max: maximum lambda value to compute CG coefficients for. :param sparse: whether to store the CG coefficients in sparse format. @@ -213,29 +133,13 @@ def __init__( else: self._use_torch = use_torch - self._coeffs = _build_cg_coeff_dict( + self._cg_coeffs = _build_cg_coeff_dict( self._lambda_max, sparse, self._use_mops, self._use_torch, ) - @property - def lambda_max(self): - return self._lambda_max - - @property - def sparse(self): - return self._sparse - - @property - def use_mops(self): - return self._use_mops - - @property - def coeffs(self): - return self._coeffs - def _build_cg_coeff_dict( lambda_max: int, sparse: bool, use_mops: bool, use_torch: bool @@ -243,6 +147,91 @@ def _build_cg_coeff_dict( """ Builds a dictionary of Clebsch-Gordan coefficients for all possible combination of l1 and l2, up to lambda_max. + + This is an intermediate data structure, as the dictionary is converted to a + TensorMap by calling the :py:func:`_cg_coeff_dict_to_tensormap` function. + For transparency, the intermediate dict data structure is described here. + + There are 3 current use cases for the format of these coefficients, and for + each the intermediate dict has a different data structure. + + Case 1: standard sparse format. + + Each dictionary entry is a dictionary with entries for each (m1, m2, mu) + combination. + + { + (l1, l2, lambda): { + (m1, m2, mu) : cg_{m1, m2, mu}^{l1, l2, lambda} + for m1 in range(-l1, l1 + 1), + for m2 in range(-l2, l2 + 1), + }, + ... + for l1 in range(0, l1_list) + for l2 in range(0, l2_list) + for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|)) + } + + Case 2: standard dense format. + + Each dictionary entry is a dense array with shape (2 * l1 + 1, 2 * l2 + 1, 2 + * lambda + 1). + + { + (l1, l2, lambda): + array( + cg_{m1, m2, mu}^{l1, l2, lambda} + ... + for m1 in range(-l1, l1 + 1), + for m2 in range(-l2, l2 + 1), + for mu in range(-lambda, lambda + 1), + + shape=(2 * l1 + 1, 2 * l2 + 1, 2 * lambda + 1), + ) + ... + for l1 in range(0, l1_list) + for l2 in range(0, l2_list) + for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|)) + } + + Case 3: MOPS sparse format. + + Each dictionary entry contains a tuple with four 1D arrays, corresponding to + the CG coeffs and m1, m2, mu indices respectively. All of these arrays are + sorted according to the mu index. This format is used for Sparse + Accumulation of Products (SAP) as implemented in MOPS. See + https://github.com/lab-cosmo/mops . + + { + (l1, l2, lambda): + ( + [ + cg_{m1, m2, mu}^{l1, l2, lambda} + ... + for m1 in range(-l1, l1 + 1), + for m2 in range(-l2, l2 + 1), + for mu in range(-lambda, lambda + 1) + ], + [ + m1 for m1 in range(-l1, l1 + 1), + ], + [ + m2 for m2 in range(-l2, l2 + 1), + ], + [ + mu for mu in range(-lambda, lambda + 1), + ], + ) + + + } + + where `cg_{m1, m2, mu}^{l1, l2, lambda}` is the Clebsch-Gordan coefficient + that describes the combination of the `m1` irreducible component of the `l1` + angular channel and the `m2` irreducible component of the `l2` angular + channel into the irreducible tensor of order `lambda`. In all cases, these + correspond to the non-zero CG coefficients, i.e. those in the range |-l, + ..., +l| for each angular order l in {l1, l2, lambda}. """ # real-to-complex and complex-to-real transformations as matrices r2c: Dict[int, Array] = {} @@ -253,16 +242,10 @@ def _build_cg_coeff_dict( if use_torch: complex_like = torch.empty(0, dtype=torch.complex128) double_like = torch.empty(0, dtype=torch.double) - # For metatensor-core backen we have to use the for Labels numpy arrays - # even with use_torch true. Logic is a nested because while scripting - # the compiler may not see `torch.ScriptClass` - if torch_jit_is_scripting(): + if isinstance(Labels, torch.ScriptClass): labels_values_like = torch.empty(0, dtype=torch.double) else: - if isinstance(Labels, torch.ScriptClass): - labels_values_like = torch.empty(0, dtype=torch.double) - else: - labels_values_like = np.empty(0, dtype=np.double) + labels_values_like = np.empty(0, dtype=np.double) else: complex_like = np.empty(0, dtype=np.complex128) double_like = np.empty(0, dtype=np.double) @@ -303,8 +286,6 @@ def _build_cg_coeff_dict( nonzeros_cg_coeffs_idx = _dispatch.where( _dispatch.abs(cg_l1l2lam_dense) > 1e-15 ) - # Till MOPS does not TorchScript support we disable the scripting - # of this part here. if use_mops: # Store CG coeffs in a specific format for use in # MOPS. Here we need the m1, m2, mu, and CG coeffs @@ -347,6 +328,42 @@ def _build_cg_coeff_dict( else: # Store coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam_dense + + return _cg_coeff_dict_to_tensormap( + coeff_dict, sparse, double_like, labels_values_like + ) + + +def _cg_coeff_dict_to_tensormap( + coeff_dict: Dict, sparse: bool, double_like, labels_values_like +): + """ + Converts the dictionary of Clebsch-Gordan coefficients to + :py:class:`TensorMap` format, whose data structure depends on whether they + will be used for sparse or dense operations. + + For both, keys are indexed by `(l1, l2, lambda)`, which stores CG + coefficients for the combination of two blocks (of order where `l1` and `l2` + respectively) to angular order `lambda`. + + Each block then has a different structure for performing sparse and dense + combinations + + Sparse: + - samples: `(m1, m2, mu)`, where `m1` and `m2` are the m component + values for the two arrays being combined and `mu` is the m component + value for the resulting array. + - components: `[]`, i.e. no components axis. + - properties: `property`, i.e. a dummy property. + + Dense: + - samples: `sample`, i.e. a dummy sample. + - components: `[(m1,), (m2,), (mu,)]`, i.e. on separate components axes, + where `m1` and `m2` are the m component values for the two arrays + being combined and `mu` is the m component value for the resulting + array. + - properties: `property`, i.e. a dummy property. + """ blocks = [] if sparse: for l1l2lam_dict in coeff_dict.values(): @@ -432,7 +449,7 @@ def _real2complex(lambda_: int, like: Array) -> Array: Operations are dispatched to the corresponding array type given by ``like`` """ - result = _dispatch.zeros_like(like, (2 * lambda_ + 1, 2 * lambda_ + 1)) + result = _dispatch.zeros_like(like, shape=(2 * lambda_ + 1, 2 * lambda_ + 1)) inv_sqrt_2 = 1.0 / math.sqrt(2.0) i_sqrt_2 = 1.0j / complex(math.sqrt(2.0)) @@ -517,22 +534,22 @@ def _complex_clebsch_gordan_matrix(l1: int, l2: int, lambda_: int, like: Array): def combine_arrays( - arr_1: Array, - arr_2: Array, + array_1: Array, + array_2: Array, lambda_: int, cg_coeffs: TensorMap, cg_backend: str, ) -> Array: """ - Couples arrays `arr_1` and `arr_2` corresponding to the irreducible + Couples arrays `array_1` and `array_2` corresponding to the irreducible spherical components of 2 angular channels l1 and l2 using the appropriate Clebsch-Gordan coefficients. As l1 and l2 can be combined to form multiple lambda channels, this function returns the coupling to a single specified channel `lambda`. The angular channels l1 and l2 are inferred from the size of the components axis (axis 1) of the input arrays. - `arr_1` has shape (n_i, 2 * l1 + 1, n_p) and `arr_2` has shape (n_i, 2 * l2 - + 1, n_q). n_i is the number of samples, n_p and n_q are the number of + `array_1` has shape (n_i, 2 * l1 + 1, n_p) and `array_2` has shape (n_i, 2 * + l2 + 1, n_q). n_i is the number of samples, n_p and n_q are the number of properties in each array. The number of samples in each array must be the same. @@ -552,59 +569,65 @@ def combine_arrays( computational cost of performing the CG combinations - i.e. using the function :py:func:`combine_single_center_to_body_order_metadata_only`. - :param arr_1: array with the m values for l1 with shape [n_samples, 2 * l1 + - 1, n_q_properties] - :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 + - 1, n_p_properties] + :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + + 1, n_q_properties] + :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + + 1, n_p_properties] :param lambda_: int value of the resulting coupled channel - :param cg_coeffs: either a sparse dictionary with keys (m1, m2, mu) and array - values being sparse blocks of shape , or a dense array - of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + 1)]. - If it is None we only return an empty array of the shape. - :param cg_backend: specifies the combine backend with sparse CG coefficients. - It can have the values "python-dense", "python-sparse", "mops" and "metadata" - - - :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] + :param cg_coeffs: either a sparse dictionary with keys (m1, m2, mu) and + array values being sparse blocks of shape , or a dense + array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + 1)]. If it is + None we only return an empty array of the shape. + :param cg_backend: specifies the combine backend with sparse CG + coefficients. It can have the values "python-dense", "python-sparse", + "mops" and "metadata". If "python-dense" or "python-sparse" is chosen, a + dense or sparse combination (respectively) of the arrays is performed + using either numpy or torch, depending on the backend. If "mops" is + chosen, a sparse combination of the arrays is performed if the external + package MOPS is installed. If "metadata" is chosen, no combination is + perfomed, and an empty array of the correct shape is returned. + + + :returns: array of shape [n_samples, (2*lambda_+1), q_properties * + p_properties] """ # If just precomputing metadata, return an empty array if cg_backend == "metadata": - return empty_combine(arr_1, arr_2, lambda_) + return empty_combine(array_1, array_2, lambda_) - # We have to temporary store it so TorchScript can infer the correct type if cg_backend == "python-sparse" or cg_backend == "mops": - return sparse_combine(arr_1, arr_2, lambda_, cg_coeffs, cg_backend) + return sparse_combine(array_1, array_2, lambda_, cg_coeffs, cg_backend) elif cg_backend == "python-dense": - return dense_combine(arr_1, arr_2, lambda_, cg_coeffs) + return dense_combine(array_1, array_2, lambda_, cg_coeffs) else: raise ValueError( - "Wrong cg_backend, got '{cg_backend}'," + f"Wrong cg_backend, got '{cg_backend}'," " but only support 'python-dense', 'python-sparse' and 'mops'." ) def empty_combine( - arr_1: Array, - arr_2: Array, + array_1: Array, + array_2: Array, lambda_: int, ) -> Array: """ - Returns the s Clebsch-Gordan combination step on 2 arrays using sparse + Returns a Clebsch-Gordan combination step on two arrays using sparse """ # Samples dimensions must be the same - assert arr_1.shape[0] == arr_2.shape[0] + assert array_1.shape[0] == array_2.shape[0] # Define other useful dimensions - n_i = arr_1.shape[0] # number of samples - n_p = arr_1.shape[2] # number of properties in arr_1 - n_q = arr_2.shape[2] # number of properties in arr_2 + n_i = array_1.shape[0] # number of samples + n_p = array_1.shape[2] # number of properties in array_1 + n_q = array_2.shape[2] # number of properties in array_2 - return _dispatch.empty_like(arr_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) + return _dispatch.empty_like(array_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) def sparse_combine( - arr_1: Array, - arr_2: Array, + array_1: Array, + array_2: Array, lambda_: int, cg_coeffs: TensorMap, cg_backend: str, @@ -615,36 +638,43 @@ def sparse_combine( its component axis, and the blocks are combined to the desired output angular channel `lambda_` using the appropriate Clebsch-Gordan coefficients. - :param arr_1: array with the m values for l1 with shape [n_samples, 2 * l1 + - 1, n_q_properties] - :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 + - 1, n_p_properties] + :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + + 1, n_q_properties] + :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + + 1, n_p_properties] :param lambda_: int value of the resulting coupled channel :param cg_coeffs: sparse dictionary with keys (m1, m2, mu) and array values being sparse blocks of shape - :param cg_backend: specifies the combine backend with sparse CG coefficients. - It can have the values "python-sparse" and "mops" - - :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] + :param cg_backend: specifies the combine backend with sparse CG + coefficients. It can have the values "python-dense", "python-sparse", + "mops" and "metadata". If "python-dense" or "python-sparse" is chosen, a + dense or sparse combination (respectively) of the arrays is performed + using either numpy or torch, depending on the backend. If "mops" is + chosen, a sparse combination of the arrays is performed if the external + package MOPS is installed. If "metadata" is chosen, no combination is + perfomed, and an empty array of the correct shape is returned. + + :returns: array of shape [n_samples, (2*lambda_+1), q_properties * + p_properties] """ # Samples dimensions must be the same - assert arr_1.shape[0] == arr_2.shape[0] + assert array_1.shape[0] == array_2.shape[0] # Infer l1 and l2 from the len of the length of axis 1 of each tensor - l1 = (arr_1.shape[1] - 1) // 2 - l2 = (arr_2.shape[1] - 1) // 2 + l1 = (array_1.shape[1] - 1) // 2 + l2 = (array_2.shape[1] - 1) // 2 # Define other useful dimensions - n_i = arr_1.shape[0] # number of samples - n_p = arr_1.shape[2] # number of properties in arr_1 - n_q = arr_2.shape[2] # number of properties in arr_2 + n_i = array_1.shape[0] # number of samples + n_p = array_1.shape[2] # number of properties in array_1 + n_q = array_2.shape[2] # number of properties in array_2 # The isinstance checks and cg_backend checks makes the logic a bit redundant # but the redundancy by the isinstance check is required for TorchScript. Logic # can be made more straightforward once MOPS support TorchScript - if isinstance(arr_1, TorchTensor) or cg_backend == "python-sparse": + if isinstance(array_1, TorchTensor) or cg_backend == "python-sparse": # Initialise output array - arr_out = _dispatch.zeros_like(arr_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) + array_out = _dispatch.zeros_like(array_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) # Get the corresponding Clebsch-Gordan coefficients # Fill in each mu component of the output array in turn @@ -655,38 +685,40 @@ def sparse_combine( m2 = m1m2mu_key[1] mu = m1m2mu_key[2] # Broadcast arrays, multiply together and with CG coeff - arr_out[:, mu, :] += ( - arr_1[:, m1, :, None] * arr_2[:, m2, None, :] * cg_l1l2lam.values[i, 0] + array_out[:, mu, :] += ( + array_1[:, m1, :, None] + * array_2[:, m2, None, :] + * cg_l1l2lam.values[i, 0] ).reshape(n_i, n_p * n_q) - return arr_out - elif isinstance(arr_1, np.ndarray) and cg_backend == "mops": + return array_out + elif isinstance(array_1, np.ndarray) and cg_backend == "mops": # Reshape - arr_1 = np.repeat(arr_1[:, :, :, None], n_q, axis=3).reshape( + array_1 = np.repeat(array_1[:, :, :, None], n_q, axis=3).reshape( n_i, 2 * l1 + 1, n_p * n_q ) - arr_2 = np.repeat(arr_2[:, :, None, :], n_p, axis=2).reshape( + array_2 = np.repeat(array_2[:, :, None, :], n_p, axis=2).reshape( n_i, 2 * l2 + 1, n_p * n_q ) - arr_1 = _dispatch.swapaxes(arr_1, 1, 2).reshape(n_i * n_p * n_q, 2 * l1 + 1) - arr_2 = _dispatch.swapaxes(arr_2, 1, 2).reshape(n_i * n_p * n_q, 2 * l2 + 1) + array_1 = _dispatch.swapaxes(array_1, 1, 2).reshape(n_i * n_p * n_q, 2 * l1 + 1) + array_2 = _dispatch.swapaxes(array_2, 1, 2).reshape(n_i * n_p * n_q, 2 * l2 + 1) # Do SAP - arr_out = sap( - arr_1, - arr_2, + array_out = sap( + array_1, + array_2, *cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}).values.flatten(), output_size=2 * lambda_ + 1, ) - assert arr_out.shape == (n_i * n_p * n_q, 2 * lambda_ + 1) + assert array_out.shape == (n_i * n_p * n_q, 2 * lambda_ + 1) # Reshape back - arr_out = arr_out.reshape(n_i, n_p * n_q, 2 * lambda_ + 1) - arr_out = _dispatch.swapaxes(arr_out, 1, 2) + array_out = array_out.reshape(n_i, n_p * n_q, 2 * lambda_ + 1) + array_out = _dispatch.swapaxes(array_out, 1, 2) - return arr_out - elif cg_backend not in ["python", "mops"]: + return array_out + elif cg_backend not in ["python-sparse", "mops"]: raise ValueError( f"sparse cg backend '{cg_backend}' is not known. " "Only values 'python-sparse' and 'mops' are valid." @@ -696,8 +728,8 @@ def sparse_combine( def dense_combine( - arr_1: Array, - arr_2: Array, + array_1: Array, + array_2: Array, lambda_: int, cg_coeffs: TensorMap, ) -> Array: @@ -707,35 +739,36 @@ def dense_combine( its component axis, and the blocks are combined to the desired output angular channel `lambda_` using the appropriate Clebsch-Gordan coefficients. - :param arr_1: array with the m values for l1 with shape [n_samples, 2 * l1 + - 1, n_q_properties] - :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 + - 1, n_p_properties] + :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + + 1, n_q_properties] + :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + + 1, n_p_properties] :param lambda_: int value of the resulting coupled channel - :param cg_coeffs: dense array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + - 1)] + :param cg_coeffs: dense array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * + lambda_ + 1)] - :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] + :returns: array of shape [n_samples, (2*lambda_+1), q_properties * + p_properties] """ # Infer l1 and l2 from the len of the length of axis 1 of each tensor - l1 = (arr_1.shape[1] - 1) // 2 - l2 = (arr_2.shape[1] - 1) // 2 + l1 = (array_1.shape[1] - 1) // 2 + l2 = (array_2.shape[1] - 1) // 2 cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}).values # (samples None None l1_mu q) * (samples l2_mu p None None) # -> (samples l2_mu p l1_mu q) we broadcast it in this way # so we only need to do one swapaxes in the next step - arr_out = arr_1[:, None, None, :, :] * arr_2[:, :, :, None, None] + array_out = array_1[:, None, None, :, :] * array_2[:, :, :, None, None] # (samples l2_mu p l1_mu q) -> (samples q p l1_mu l2_mu) - arr_out = _dispatch.swapaxes(arr_out, 1, 4) + array_out = _dispatch.swapaxes(array_out, 1, 4) # samples (q p l1_mu l2_mu) -> (samples (q p) (l1_mu l2_mu)) - arr_out = arr_out.reshape( + array_out = array_out.reshape( -1, - arr_1.shape[2] * arr_2.shape[2], - arr_1.shape[1] * arr_2.shape[1], + array_1.shape[2] * array_2.shape[2], + array_1.shape[1] * array_2.shape[1], ) # (l1_mu l2_mu lam_mu) -> ((l1_mu l2_mu) lam_mu) @@ -743,7 +776,7 @@ def dense_combine( # (samples (q p) (l1_mu l2_mu)) @ ((l1_mu l2_mu) lam_mu) # -> samples (q p) lam_mu - arr_out = arr_out @ cg_l1l2lam + array_out = array_out @ cg_l1l2lam # (samples (q p) lam_mu) -> (samples lam_mu (q p)) - return _dispatch.swapaxes(arr_out, 1, 2) + return _dispatch.swapaxes(array_out, 1, 2) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index 333da24ea..924ae3f7d 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -6,8 +6,18 @@ from typing import List, Optional, Tuple, Union -from . import _cg_cache, _dispatch -from ._classes import ( +from .. import _dispatch + +# from ._classes import ( +# Array, +# Labels, +# LabelsEntry, +# TensorBlock, +# TensorMap, +# is_labels, +# torch_jit_annotate, +# ) +from .._backend import ( Array, Labels, LabelsEntry, @@ -16,6 +26,7 @@ is_labels, torch_jit_annotate, ) +from . import _cg_cache # ================================================================== @@ -60,8 +71,8 @@ def _parse_selected_keys( n_iterations: int, array_like: Array, angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, -) -> List[Union[None, Labels]]: + selected_keys: Optional[Union[Labels, List[Labels]]] = None, +) -> List[Labels]: """ Parses the `selected_keys` argument passed to public functions. Checks the values and returns a :py:class:`list` of :py:class:`Labels` objects, one for @@ -74,9 +85,7 @@ def _parse_selected_keys( and (not isinstance(selected_keys, list)) and (not is_labels(selected_keys)) ): - raise TypeError( - "`selected_keys` must be `None`, `Labels` or List[Union[None, `Labels`]]" - ) + raise TypeError("`selected_keys` must be `None`, `Labels` or List[`Labels`]") if isinstance(selected_keys, list): if not all( @@ -85,9 +94,7 @@ def _parse_selected_keys( for i in range(len(selected_keys)) ] ): - raise TypeError( - "`selected_keys` must be a Labels or List[Union[None, Labels]]" - ) + raise TypeError("`selected_keys` must be a Labels or List[Labels]") # Check angular_cutoff arg if angular_cutoff is not None: @@ -121,31 +128,31 @@ def _parse_selected_keys( if not len(selected_keys_) == n_iterations: raise ValueError( - "`selected_keys` must be a List[Union[None, Labels]] of length" + "`selected_keys` must be a List[Labels] of length" " `correlation_order` - 1" ) # Now iterate over each of the Labels (or None) in the list and check - for slct in selected_keys_: - if slct is None: + for selected in selected_keys_: + if selected is None: continue - if not (is_labels(slct)): - raise ValueError("Asserted that elements in `slct` are Labels") + if not (is_labels(selected)): + raise ValueError("Asserted that elements in `selected` are Labels") if not all( [ name in ["spherical_harmonics_l", "inversion_sigma"] - for name in slct.names + for name in selected.names ] ): raise ValueError( "specified key names in `selected_keys` must be either" " 'spherical_harmonics_l' or 'inversion_sigma'" ) - if "spherical_harmonics_l" in slct.names: + if "spherical_harmonics_l" in selected.names: if angular_cutoff is not None: below_cutoff: Array = ( - slct.column("spherical_harmonics_l") <= angular_cutoff + selected.column("spherical_harmonics_l") <= angular_cutoff ) if not _dispatch.all(below_cutoff): raise ValueError( @@ -155,7 +162,7 @@ def _parse_selected_keys( above_zero = _dispatch.bool_array_like( [ bool(angular_l >= 0) - for angular_l in slct.column("spherical_harmonics_l") + for angular_l in selected.column("spherical_harmonics_l") ], like=array_like, ) @@ -163,12 +170,12 @@ def _parse_selected_keys( raise ValueError( "specified angular channels in `selected_keys` must be >= 0" ) - if "inversion_sigma" in slct.names: + if "inversion_sigma" in selected.names: if not _dispatch.all( _dispatch.bool_array_like( [ bool(parity_s in [-1, 1]) - for parity_s in slct.column("inversion_sigma") + for parity_s in selected.column("inversion_sigma") ], array_like, ) @@ -408,8 +415,6 @@ def _precompute_keys_full_product( sig = int(sig1 * sig2 * (-1) ** (lam1 + lam2 + lambda_)) # Extract the l and k lists from keys_1 - # We have to convert to int64 because of - # https://github.com/pytorch/pytorch/issues/76295 l_list: List[int] = _dispatch.to_int_list(keys_1.values[i, 4 : 4 + nu1]) k_list: List[int] = _dispatch.to_int_list(keys_1.values[i, 4 + nu1 :]) @@ -461,12 +466,12 @@ def _apply_key_selection( keys_out_vals = keys_out.values[:, col_idx] # First check that all of the selected keys exist in the output keys - for slct in selected_keys.values: + for selected in selected_keys.values: if not any( - [bool(all(slct == keys_out_vals[i])) for i in range(len(keys_out_vals))] + [bool(all(selected == keys_out_vals[i])) for i in range(len(keys_out_vals))] ): raise ValueError( - f"selected key {selected_keys.names} = {slct} not found" + f"selected key {selected_keys.names} = {selected} not found" " in the output keys. Check the `selected_keys` argument." ) @@ -513,9 +518,8 @@ def _remove_redundant_keys( key_idxs_to_keep: List[int] = [] for key_idx in range(len(keys_out)): key = keys_out.entry(key_idx) - # Get the important key values. This is all of the keys, excpet the k - # list. We have to convert to int64 because of - # https://github.com/pytorch/pytorch/issues/76295 + # Get the important key values. This is all of the keys, except the k + # list. key_vals_slice: List[int] = _dispatch.to_int_list(key.values[: 4 + (nu + 1)]) first_part, l_list = key_vals_slice[:4], key_vals_slice[4:] diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index dff14e6c4..9b2995016 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -8,17 +8,27 @@ import numpy as np -from . import _cg_cache, _clebsch_gordan, _dispatch -from ._classes import ( +from .. import _dispatch + +# from ._classes import ( +# Labels, +# LabelsEntry, +# TensorBlock, +# TensorMap, +# TorchScriptClass, +# torch_jit_export, +# torch_jit_is_scripting, +# ) +from .._backend import ( Labels, LabelsEntry, TensorBlock, TensorMap, - TorchModule, TorchScriptClass, torch_jit_export, torch_jit_is_scripting, ) +from . import _cg_cache, _clebsch_gordan try: @@ -40,7 +50,7 @@ # ====================================================================== -class DensityCorrelations(TorchModule): +class DensityCorrelations: """ Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor with itself up to the desired correlation order. Returns @@ -122,7 +132,7 @@ def __init__( max_angular: int, correlation_order: int, angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Union[Labels, None]]]] = None, + selected_keys: Optional[Union[Labels, List[Labels]]] = None, skip_redundant: Optional[Union[bool, List[bool]]] = False, output_selection: Optional[Union[bool, List[bool]]] = None, arrays_backend: Optional[str] = None, @@ -198,7 +208,7 @@ def __init__( sparse=sparse, use_mops=use_mops, use_torch=(self._arrays_backend == "torch"), - ).coeffs + )._cg_coeffs # Check inputs if correlation_order <= 1: @@ -232,30 +242,6 @@ def __init__( ) ) - @property - def correlation_order(self): - return self._correlation_order - - @property - def selected_keys(self) -> List[Union[Labels, None]]: - return self._selected_keys - - @property - def skip_redundant(self) -> List[bool]: - return self._skip_redundant - - @property - def output_selection(self) -> List[bool]: - return self._output_selection - - @property - def arrays_backend(self): - return self._arrays_backend - - @property - def cg_backend(self): - return self._cg_backend - @property def cg_coeffs(self) -> TensorMap: return self._cg_coeffs diff --git a/python/rascaline/rascaline/utils/power_spectrum/_classes.py b/python/rascaline/rascaline/utils/power_spectrum/_classes.py deleted file mode 100644 index 01ea036d2..000000000 --- a/python/rascaline/rascaline/utils/power_spectrum/_classes.py +++ /dev/null @@ -1,13 +0,0 @@ -from metatensor import Labels, TensorBlock, TensorMap - -from ...calculator_base import CalculatorBase -from ...systems import IntoSystem - - -__all__ = [ - "CalculatorBase", - "IntoSystem", - "Labels", - "TensorBlock", - "TensorMap", -] diff --git a/python/rascaline/rascaline/utils/power_spectrum/_dispatch.py b/python/rascaline/rascaline/utils/power_spectrum/_dispatch.py deleted file mode 100644 index 397f354e9..000000000 --- a/python/rascaline/rascaline/utils/power_spectrum/_dispatch.py +++ /dev/null @@ -1,167 +0,0 @@ -"""Helper functions to dispatch methods between numpy and torch. - -The functions are similar to those in metatensor-operations. Missing functions may -already exist there. Functions are ordered alphabetically. -""" - -from typing import List, Optional - -import numpy as np - - -try: - import torch - from torch import Tensor as TorchTensor -except ImportError: - - class TorchTensor: - pass - - -UNKNOWN_ARRAY_TYPE = ( - "unknown array type, only numpy arrays and torch tensors are supported" -) - - -def _check_all_torch_tensor(arrays: List[TorchTensor]): - for array in arrays: - if not isinstance(array, TorchTensor): - raise TypeError( - f"expected argument to be a torch.Tensor, but got {type(array)}" - ) - - -def _check_all_np_ndarray(arrays): - for array in arrays: - if not isinstance(array, np.ndarray): - raise TypeError( - f"expected argument to be a np.ndarray, but got {type(array)}" - ) - - -def concatenate(arrays: List[TorchTensor], axis: int): - """ - Concatenate a group of arrays along a given axis. - - This function has the same behavior as ``numpy.concatenate(arrays, axis)`` - and ``torch.concatenate(arrays, axis)``. - - Passing `axis` as ``0`` is equivalent to :py:func:`numpy.vstack`, ``1`` to - :py:func:`numpy.hstack`, and ``2`` to :py:func:`numpy.dstack`, though any - axis index > 0 is valid. - """ - if isinstance(arrays[0], TorchTensor): - _check_all_torch_tensor(arrays) - return torch.concatenate(arrays, axis) - elif isinstance(arrays[0], np.ndarray): - _check_all_np_ndarray(arrays) - return np.concatenate(arrays, axis) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): - """ - Create an uninitialized array, with the given ``shape``, and similar dtype, - device and other options as ``array``. - - If ``shape`` is :py:obj:`None`, the array shape is used instead. - ``requires_grad`` is only used for torch tensors, and set the corresponding - value on the returned array. - - This is the equivalent to ``np.empty_like(array, shape=shape)``. - """ - if isinstance(array, TorchTensor): - if shape is None: - shape = array.size() - return torch.empty( - shape, - dtype=array.dtype, - layout=array.layout, - device=array.device, - ).requires_grad_(requires_grad) - elif isinstance(array, np.ndarray): - return np.empty_like(array, shape=shape, subok=False) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def list_to_array(array, data: List[List[int]]): - """Create an object from data with the same type as ``array``.""" - if isinstance(array, TorchTensor): - return torch.tensor(data) - elif isinstance(array, np.ndarray): - return np.array(data) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def matmul(a, b): - """Matrix product of two arrays.""" - if isinstance(a, TorchTensor): - _check_all_torch_tensor([b]) - return torch.matmul(a, b) - elif isinstance(a, np.ndarray): - _check_all_np_ndarray([b]) - return np.matmul(a, b) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def to_index_array(array): - """Returns an array that is suitable for indexing a dimension of - a different array. - After a few checks (int, 1D), this operation will convert the dtype to - torch.long (which is, in some torch versions, the only acceptable type - of index tensor). Numpy arrays are left unchanged. - """ - if len(array.shape) != 1: - raise ValueError("Index arrays must be 1D") - - if isinstance(array, TorchTensor): - if torch.is_floating_point(array): - raise ValueError("Index arrays must be integers") - return array.to(torch.long) - elif isinstance(array, np.ndarray): - if not np.issubdtype(array.dtype, np.integer): - raise ValueError("Index arrays must be integers") - return array - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def unique(array, axis: Optional[int] = None): - """Find the unique elements of an array.""" - if isinstance(array, TorchTensor): - return torch.unique(array, dim=axis) - elif isinstance(array, np.ndarray): - return np.unique(array, axis=axis) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): - """ - Create an array filled with zeros, with the given ``shape``, and similar - dtype, device and other options as ``array``. - - If ``shape`` is :py:obj:`None`, the array shape is used instead. - ``requires_grad`` is only used for torch tensors, and set the corresponding - value on the returned array. - - This is the equivalent to ``np.zeros_like(array, shape=shape)``. - """ - if isinstance(array, TorchTensor): - if shape is None: - shape = array.size() - - return torch.zeros( - shape, - dtype=array.dtype, - layout=array.layout, - device=array.device, - ).requires_grad_(requires_grad) - elif isinstance(array, np.ndarray): - return np.zeros_like(array, shape=shape, subok=False) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) diff --git a/python/rascaline/rascaline/utils/power_spectrum/calculator.py b/python/rascaline/rascaline/utils/power_spectrum/calculator.py index d96a65e55..f7de5156b 100644 --- a/python/rascaline/rascaline/utils/power_spectrum/calculator.py +++ b/python/rascaline/rascaline/utils/power_spectrum/calculator.py @@ -2,8 +2,10 @@ from math import sqrt from typing import List, Optional, Union -from . import _dispatch -from ._classes import CalculatorBase, IntoSystem, Labels, TensorBlock, TensorMap +from .. import _dispatch + +# from ._classes import CalculatorBase, IntoSystem, Labels, TensorBlock, TensorMap +from .._backend import CalculatorBase, IntoSystem, Labels, TensorBlock, TensorMap class PowerSpectrum: diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index 46b2ebba0..f2473b1cf 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -8,8 +8,7 @@ from metatensor import Labels, TensorBlock, TensorMap import rascaline -from rascaline.utils import PowerSpectrum -from rascaline.utils.clebsch_gordan import _dispatch +from rascaline.utils import PowerSpectrum, _dispatch from rascaline.utils.clebsch_gordan._cg_cache import ClebschGordanReal from rascaline.utils.clebsch_gordan._clebsch_gordan import _standardize_keys from rascaline.utils.clebsch_gordan.correlate_density import DensityCorrelations @@ -76,15 +75,44 @@ def h2_isolated(): - return ase.io.read(os.path.join(DATA_ROOT, "h2_isolated.xyz"), ":") + return [ + ase.Atoms( + symbols=["H", "H"], + positions=[ + [1.97361700, 1.73067300, 2.47063400], + [1.97361700, 3.26932700, 2.47063400], + ], + ) + ] def h2o_isolated(): - return ase.io.read(os.path.join(DATA_ROOT, "h2o_isolated.xyz"), ":") + return [ + ase.Atoms( + symbols=["O", "H", "H"], + positions=[ + [2.56633400, 2.50000000, 2.50370100], + [1.97361700, 1.73067300, 2.47063400], + [1.97361700, 3.26932700, 2.47063400], + ], + ) + ] def h2o_periodic(): - return ase.io.read(os.path.join(DATA_ROOT, "h2o_periodic.xyz"), ":") + + return [ + ase.Atoms( + symbols=["O", "H", "H"], + positions=[ + [2.56633400, 2.50000000, 2.50370100], + [1.97361700, 1.73067300, 2.47063400], + [1.97361700, 3.26932700, 2.47063400], + ], + cell=[5, 5, 5], + pbc=[True, True, True], + ) + ] def wigner_d_matrices(lmax: int): @@ -350,7 +378,7 @@ def test_clebsch_gordan_orthogonality(l1, l2, arrays_backend): """ cg_coeffs = ClebschGordanReal( lambda_max=5, sparse=False, use_torch=arrays_backend == "torch" - ).coeffs + )._cg_coeffs lam_min = abs(l1 - l2) lam_max = l1 + l2 diff --git a/python/rascaline/tests/utils/data/h2_isolated.xyz b/python/rascaline/tests/utils/data/h2_isolated.xyz deleted file mode 100644 index ec5f59680..000000000 --- a/python/rascaline/tests/utils/data/h2_isolated.xyz +++ /dev/null @@ -1,4 +0,0 @@ -2 -pbc="F F F" -H 1.97361700 1.73067300 2.47063400 -H 1.97361700 3.26932700 2.47063400 diff --git a/python/rascaline/tests/utils/data/h2o_isolated.xyz b/python/rascaline/tests/utils/data/h2o_isolated.xyz deleted file mode 100644 index fc876d2ba..000000000 --- a/python/rascaline/tests/utils/data/h2o_isolated.xyz +++ /dev/null @@ -1,5 +0,0 @@ -3 -pbc="F F F" -O 2.56633400 2.50000000 2.50370100 -H 1.97361700 1.73067300 2.47063400 -H 1.97361700 3.26932700 2.47063400 diff --git a/python/rascaline/tests/utils/data/h2o_periodic.xyz b/python/rascaline/tests/utils/data/h2o_periodic.xyz deleted file mode 100644 index 3374566e6..000000000 --- a/python/rascaline/tests/utils/data/h2o_periodic.xyz +++ /dev/null @@ -1,5 +0,0 @@ -3 -Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" pbc="T T T" -O 2.56633400 2.50000000 2.50370100 -H 1.97361700 1.73067300 2.47063400 -H 1.97361700 3.26932700 2.47063400 From 7c14c221dd81588162e2ad203875255eb37e755b Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Mon, 19 Feb 2024 11:15:38 +0100 Subject: [PATCH 17/23] Get rid of __all__ --- python/rascaline-torch/rascaline/torch/utils/__init__.py | 2 -- python/rascaline/rascaline/utils/clebsch_gordan/__init__.py | 5 ----- python/rascaline/rascaline/utils/power_spectrum/__init__.py | 3 --- 3 files changed, 10 deletions(-) diff --git a/python/rascaline-torch/rascaline/torch/utils/__init__.py b/python/rascaline-torch/rascaline/torch/utils/__init__.py index 6670685e5..d5ed1a6cd 100644 --- a/python/rascaline-torch/rascaline/torch/utils/__init__.py +++ b/python/rascaline-torch/rascaline/torch/utils/__init__.py @@ -10,5 +10,3 @@ """ Path containing the CMake configuration files for the underlying C library """ - -__all__ = ["PowerSpectrum", "DensityCorrelations"] diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py index 1da995612..da3f61f7c 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py @@ -1,6 +1 @@ from .correlate_density import DensityCorrelations - - -__all__ = [ - "DensityCorrelations", -] diff --git a/python/rascaline/rascaline/utils/power_spectrum/__init__.py b/python/rascaline/rascaline/utils/power_spectrum/__init__.py index a6d2d6f3f..feecf6d9e 100644 --- a/python/rascaline/rascaline/utils/power_spectrum/__init__.py +++ b/python/rascaline/rascaline/utils/power_spectrum/__init__.py @@ -1,4 +1 @@ from .calculator import PowerSpectrum - - -__all__ = ["PowerSpectrum"] From be331f1a257811c54e3d676114f97d5274d72a57 Mon Sep 17 00:00:00 2001 From: Guillaume Fraux Date: Mon, 19 Feb 2024 11:46:06 +0100 Subject: [PATCH 18/23] Fix Python import --- .../{utils/clebsch_gordan.py => utils.py} | 35 +++++-- .../rascaline/torch/utils/__init__.py | 12 --- .../rascaline/torch/utils/power_spectrum.py | 99 ------------------- .../tests/utils/correlate_density.py | 9 -- python/rascaline/rascaline/utils/_backend.py | 1 + .../utils/clebsch_gordan/__init__.py | 2 +- .../utils/clebsch_gordan/correlate_density.py | 52 ++++------ .../utils/power_spectrum/calculator.py | 28 +++++- 8 files changed, 71 insertions(+), 167 deletions(-) rename python/rascaline-torch/rascaline/torch/{utils/clebsch_gordan.py => utils.py} (72%) delete mode 100644 python/rascaline-torch/rascaline/torch/utils/__init__.py delete mode 100644 python/rascaline-torch/rascaline/torch/utils/power_spectrum.py diff --git a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py b/python/rascaline-torch/rascaline/torch/utils.py similarity index 72% rename from python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py rename to python/rascaline-torch/rascaline/torch/utils.py index d3ac6b41e..1625581b4 100644 --- a/python/rascaline-torch/rascaline/torch/utils/clebsch_gordan.py +++ b/python/rascaline-torch/rascaline/torch/utils.py @@ -6,14 +6,20 @@ import torch from metatensor.torch import Labels, LabelsEntry, TensorBlock, TensorMap -import rascaline.utils.clebsch_gordan +import rascaline.utils + +from .calculator_base import CalculatorModule +from .system import System + + +_HERE = os.path.dirname(__file__) # For details what is happening here take a look an `rascaline.torch.calculators`. -# Step 1: create the `_classes` module as an empty module +# create the `_backend` module as an empty module spec = importlib.util.spec_from_loader( - "rascaline.torch.utils.clebsch_gordan._classes", + "rascaline.torch.utils._backend", loader=None, ) module = importlib.util.module_from_spec(spec) @@ -31,6 +37,8 @@ module.__dict__["TorchModule"] = torch.nn.Module module.__dict__["TorchScriptClass"] = torch.ScriptClass module.__dict__["Array"] = torch.Tensor +module.__dict__["CalculatorBase"] = CalculatorModule +module.__dict__["IntoSystem"] = System def is_labels(obj: Any): @@ -55,19 +63,26 @@ def check_isinstance(obj, ty): return isinstance(obj, ty) -module.__dict__["check_isinstance"] = check_isinstance - # register the module in sys.modules, so future import find it directly sys.modules[spec.name] = module - -# Step 2: create a module named `rascaline.torch.utils.clebsch_gordan` using code from -# `rascaline.utils.clebsch_gordan` +# create a module named `rascaline.torch.utils` using code from +# `rascaline.utils` spec = importlib.util.spec_from_file_location( - "rascaline.torch.utils.clebsch_gordan", - rascaline.utils.clebsch_gordan.__file__, + "rascaline.torch.utils", rascaline.utils.__file__ ) module = importlib.util.module_from_spec(spec) + + +cmake_prefix_path = os.path.realpath(os.path.join(_HERE, "..", "lib", "cmake")) +""" +Path containing the CMake configuration files for the underlying C library +""" + +module.__dict__["cmake_prefix_path"] = cmake_prefix_path + +# override `rascaline.torch.utils` (the module associated with the current file) +# with the newly created module sys.modules[spec.name] = module spec.loader.exec_module(module) diff --git a/python/rascaline-torch/rascaline/torch/utils/__init__.py b/python/rascaline-torch/rascaline/torch/utils/__init__.py deleted file mode 100644 index d5ed1a6cd..000000000 --- a/python/rascaline-torch/rascaline/torch/utils/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -import os - -from .clebsch_gordan import DensityCorrelations -from .power_spectrum import PowerSpectrum - - -_HERE = os.path.dirname(__file__) - -cmake_prefix_path = os.path.realpath(os.path.join(_HERE, "..", "lib", "cmake")) -""" -Path containing the CMake configuration files for the underlying C library -""" diff --git a/python/rascaline-torch/rascaline/torch/utils/power_spectrum.py b/python/rascaline-torch/rascaline/torch/utils/power_spectrum.py deleted file mode 100644 index 7bf9b9321..000000000 --- a/python/rascaline-torch/rascaline/torch/utils/power_spectrum.py +++ /dev/null @@ -1,99 +0,0 @@ -import importlib -import sys -from typing import List, Optional, Union - -import torch -from metatensor.torch import Labels, TensorBlock, TensorMap - -import rascaline.utils.power_spectrum - -from ..calculator_base import CalculatorModule as CalculatorBase -from ..system import System as IntoSystem - - -# For details what is happening here take a look an `rascaline.torch.calculators`. - -# Step 1: create te `_classes` module as an empty module -spec = importlib.util.spec_from_loader( - "rascaline.torch.utils.power_spectrum._classes", - loader=None, -) -module = importlib.util.module_from_spec(spec) -# This module only exposes a handful of things, defined here. Any changes here MUST also -# be made to the `metatensor/operations/_classes.py` file, which is used in non -# TorchScript mode. -module.__dict__["Labels"] = Labels -module.__dict__["TensorBlock"] = TensorBlock -module.__dict__["TensorMap"] = TensorMap -module.__dict__["CalculatorBase"] = CalculatorBase -module.__dict__["IntoSystem"] = IntoSystem - -# register the module in sys.modules, so future import find it directly -sys.modules[spec.name] = module - - -# Step 2: create a module named `rascaline.torch.utils.power_spectrum` using code from -# `rascaline.utils.power_spectrum` -spec = importlib.util.spec_from_file_location( - "rascaline.torch.utils.power_spectrum", - rascaline.utils.power_spectrum.__file__, -) - -module = importlib.util.module_from_spec(spec) -sys.modules[spec.name] = module -spec.loader.exec_module(module) - - -# Store the original class to avoid recursion problems -PowerSpectrumBase = module.PowerSpectrum - - -class PowerSpectrum(torch.nn.Module, PowerSpectrumBase): - """ - Torch version of the general power spectrum of one or of two calculators. - - The class provides :py:meth:`PowerSpectrum.forward` and the integration with - :py:class:`torch.nn.Module`. For more details see - :py:class:`rascaline.utils.PowerSpectrum`. - - :param calculator_1: first calculator - :param calculator_1: second calculator - :param species: List of `species_neighbor` to fill all blocks with. This option - might be useful when joining along the ``sample`` direction after computation. - If :py:obj:`None` blocks are filled with `species_neighbor` from all blocks. - :raises ValueError: If other calculators than - :py:class:`rascaline.SphericalExpansion` or - :py:class:`rascaline.LodeSphericalExpansion` are used. - :raises ValueError: If ``'max_angular'`` of both calculators is different - """ - - def __init__( - self, - calculator_1: CalculatorBase, - calculator_2: Optional[CalculatorBase] = None, - species: Optional[List[int]] = None, - ): - torch.nn.Module.__init__(self) - PowerSpectrumBase.__init__( - self, - calculator_1=calculator_1, - calculator_2=calculator_2, - species=species, - ) - - def forward( - self, - systems: Union[IntoSystem, List[IntoSystem]], - gradients: Optional[List[str]] = None, - use_native_system: bool = True, - ) -> TensorMap: - """forward just calls :py:meth:`PowerSpectrum.compute`""" - - return self.compute( - systems=systems, - gradients=gradients, - use_native_system=use_native_system, - ) - - -module.__dict__["PowerSpectrum"] = PowerSpectrum diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py index fd5687c65..47126707d 100644 --- a/python/rascaline-torch/tests/utils/correlate_density.py +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -81,15 +81,6 @@ def test_torch_script_correlate_density_angular_selection( scripted_nu_2 = scripted_corr_calculator.compute_metadata(nu_1) assert metatensor.torch.equal_metadata(scripted_nu_2, ref_nu_2) - # Test if properties are accesible - assert isinstance(corr_calculator.correlation_order, int) - assert isinstance(corr_calculator.selected_keys, list) - assert isinstance(corr_calculator.skip_redundant, list) - assert isinstance(corr_calculator.output_selection, list) - assert isinstance(corr_calculator.arrays_backend, str) - assert isinstance(corr_calculator.cg_backend, str) - assert is_tensor_map(corr_calculator.cg_coeffs) - def test_jit_save_load(): corr_calculator = DensityCorrelations( diff --git a/python/rascaline/rascaline/utils/_backend.py b/python/rascaline/rascaline/utils/_backend.py index 7e581153e..8e614ed74 100644 --- a/python/rascaline/rascaline/utils/_backend.py +++ b/python/rascaline/rascaline/utils/_backend.py @@ -44,6 +44,7 @@ class TorchScriptClass: Array = Union[np.ndarray, TorchTensor] __all__ = [ + "Array", "CalculatorBase", "IntoSystem", "Labels", diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py index da3f61f7c..0760f2d36 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py @@ -1 +1 @@ -from .correlate_density import DensityCorrelations +from .correlate_density import DensityCorrelations # noqa: F401 diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 9b2995016..7310b6f6e 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -9,21 +9,12 @@ import numpy as np from .. import _dispatch - -# from ._classes import ( -# Labels, -# LabelsEntry, -# TensorBlock, -# TensorMap, -# TorchScriptClass, -# torch_jit_export, -# torch_jit_is_scripting, -# ) from .._backend import ( Labels, LabelsEntry, TensorBlock, TensorMap, + TorchModule, TorchScriptClass, torch_jit_export, torch_jit_is_scripting, @@ -50,7 +41,7 @@ # ====================================================================== -class DensityCorrelations: +class DensityCorrelations(TorchModule): """ Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor with itself up to the desired correlation order. Returns @@ -141,31 +132,31 @@ def __init__( super().__init__() if arrays_backend is None: if torch_jit_is_scripting(): - self._arrays_backend = "torch" + arrays_backend = "torch" else: if isinstance(Labels, TorchScriptClass): - self._arrays_backend = "torch" + arrays_backend = "torch" else: - self._arrays_backend = "numpy" + arrays_backend = "numpy" elif arrays_backend == "numpy": if torch_jit_is_scripting(): raise ValueError( "Module is torch scripted but 'numpy' was given as `arrays_backend`" ) - self._arrays_backend = "numpy" + arrays_backend = "numpy" elif arrays_backend == "torch": - self._arrays_backend = "torch" + arrays_backend = "torch" else: raise ValueError( - f"Unkown `arrays_backend` {arrays_backend}." + f"Unknown `arrays_backend` {arrays_backend}." "Only 'numpy' and 'torch' are supported." ) # Choosing the optimal cg combine backend if cg_backend is None: - if self._arrays_backend == "torch": + if arrays_backend == "torch": self._cg_backend = "python-dense" - if self._arrays_backend == "numpy" and HAS_MOPS: + if arrays_backend == "numpy" and HAS_MOPS: self._cg_backend = "mops" else: self._cg_backend = "python-sparse" @@ -174,7 +165,7 @@ def __init__( elif cg_backend == "python-sparse": self._cg_backend = "python-sparse" elif cg_backend == "mops": - if self._arrays_backend == "torch": + if arrays_backend == "torch": raise NotImplementedError( "'numpy' was determined or given as `arrays_backend` " "and 'mops' was given as `cg_backend`, " @@ -182,7 +173,7 @@ def __init__( ) else: raise ValueError( - f"Unkown `cg_backend` {cg_backend}." + f"Unknown `cg_backend` {cg_backend}." "Only 'python-dense', 'python-sparse' and 'mops' are supported." ) @@ -203,11 +194,11 @@ def __init__( sparse = True use_mops = True - self._cg_coeffs = _cg_cache.ClebschGordanReal( + self._cg_coefficients = _cg_cache.ClebschGordanReal( self._max_angular, sparse=sparse, use_mops=use_mops, - use_torch=(self._arrays_backend == "torch"), + use_torch=(arrays_backend == "torch"), )._cg_coeffs # Check inputs @@ -219,9 +210,9 @@ def __init__( # Parse the selected keys self._angular_cutoff = angular_cutoff - if self._arrays_backend == "torch": + if arrays_backend == "torch": array_like = torch.empty(0) - elif self._arrays_backend == "numpy": + elif arrays_backend == "numpy": array_like = np.empty(0) self._selected_keys: List[Union[Labels, None]] = ( @@ -242,11 +233,8 @@ def __init__( ) ) - @property - def cg_coeffs(self) -> TensorMap: - return self._cg_coeffs - def forward(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: + """TODO""" return self.compute(density) def compute(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: @@ -273,7 +261,7 @@ def compute_metadata( """ Returns the metadata-only :py:class:`TensorMap`(s) that would be output by the function :py:meth:`compute` for the same calculator under the - same settings, without perfoming the actual Clebsch-Gordan tensor + same settings, without performing the actual Clebsch-Gordan tensor products. :param density: A density descriptor of body order 2 (correlation order @@ -371,7 +359,7 @@ def _correlate_density( density_correlation.block(key_1), density.block(key_2), lambda_out, - self._cg_coeffs, + self._cg_coefficients, cg_backend, ) blocks_out.append(block_out) @@ -389,7 +377,7 @@ def _correlate_density( ) # Drop redundant key names. TODO: these should be part of the global - # matadata associated with the TensorMap. Awaiting this functionality in + # metadata associated with the TensorMap. Awaiting this functionality in # metatensor. for i, tensor in enumerate(density_correlations): keys = tensor.keys diff --git a/python/rascaline/rascaline/utils/power_spectrum/calculator.py b/python/rascaline/rascaline/utils/power_spectrum/calculator.py index f7de5156b..fcee1e2e4 100644 --- a/python/rascaline/rascaline/utils/power_spectrum/calculator.py +++ b/python/rascaline/rascaline/utils/power_spectrum/calculator.py @@ -3,12 +3,17 @@ from typing import List, Optional, Union from .. import _dispatch +from .._backend import ( + CalculatorBase, + IntoSystem, + Labels, + TensorBlock, + TensorMap, + TorchModule, +) -# from ._classes import CalculatorBase, IntoSystem, Labels, TensorBlock, TensorMap -from .._backend import CalculatorBase, IntoSystem, Labels, TensorBlock, TensorMap - -class PowerSpectrum: +class PowerSpectrum(TorchModule): r"""General power spectrum of one or of two calculators. If ``calculator_2`` is provided, the invariants :math:`p_{nl}` are generated by @@ -126,6 +131,7 @@ def __init__( calculator_2: Optional[CalculatorBase] = None, species: Optional[List[int]] = None, ): + super().__init__() self.calculator_1 = calculator_1 self.calculator_2 = calculator_2 self.species = species @@ -312,6 +318,20 @@ def compute( return TensorMap(new_keys, new_blocks).keys_to_properties("l") + def forward( + self, + systems: Union[IntoSystem, List[IntoSystem]], + gradients: Optional[List[str]] = None, + use_native_system: bool = True, + ) -> TensorMap: + """TODO""" + + return self.compute( + systems=systems, + gradients=gradients, + use_native_system=use_native_system, + ) + def _positions_gradients( new_block: TensorBlock, block_1: TensorBlock, block_2: TensorBlock, factor: float From 993f0050b9835a55176a020691af37343c02c4db Mon Sep 17 00:00:00 2001 From: Guillaume Fraux Date: Mon, 19 Feb 2024 11:58:39 +0100 Subject: [PATCH 19/23] Add CG to API docs --- .../api/python/utils/clebsch-gordan.rst | 5 + .../src/references/api/python/utils/index.rst | 1 + .../api/torch/utils/clebsch-gordan.rst | 5 + docs/src/references/api/torch/utils/index.rst | 1 + .../utils/clebsch_gordan/correlate_density.py | 162 +++++++++--------- .../utils/power_spectrum/calculator.py | 7 +- 6 files changed, 97 insertions(+), 84 deletions(-) create mode 100644 docs/src/references/api/python/utils/clebsch-gordan.rst create mode 100644 docs/src/references/api/torch/utils/clebsch-gordan.rst diff --git a/docs/src/references/api/python/utils/clebsch-gordan.rst b/docs/src/references/api/python/utils/clebsch-gordan.rst new file mode 100644 index 000000000..7d1258348 --- /dev/null +++ b/docs/src/references/api/python/utils/clebsch-gordan.rst @@ -0,0 +1,5 @@ +Clebsch-Gordan products +======================= + +.. autoclass:: rascaline.utils.DensityCorrelations + :members: diff --git a/docs/src/references/api/python/utils/index.rst b/docs/src/references/api/python/utils/index.rst index 3d88c4da9..d713fdcb9 100644 --- a/docs/src/references/api/python/utils/index.rst +++ b/docs/src/references/api/python/utils/index.rst @@ -11,3 +11,4 @@ Utility functions and classes that extend the core usage of rascaline. radial-basis power-spectrum splines + clebsch-gordan diff --git a/docs/src/references/api/torch/utils/clebsch-gordan.rst b/docs/src/references/api/torch/utils/clebsch-gordan.rst new file mode 100644 index 000000000..a8cb5299a --- /dev/null +++ b/docs/src/references/api/torch/utils/clebsch-gordan.rst @@ -0,0 +1,5 @@ +Clebsch-Gordan products +======================= + +.. autoclass:: rascaline.torch.utils.DensityCorrelations + :members: diff --git a/docs/src/references/api/torch/utils/index.rst b/docs/src/references/api/torch/utils/index.rst index 1351283df..a2cf425eb 100644 --- a/docs/src/references/api/torch/utils/index.rst +++ b/docs/src/references/api/torch/utils/index.rst @@ -8,3 +8,4 @@ Utility functions and classes that extend the core usage of rascaline-torch :maxdepth: 1 power-spectrum + clebsch-gordan diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 7310b6f6e..d00892988 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -43,77 +43,69 @@ class DensityCorrelations(TorchModule): """ - Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor - with itself up to the desired correlation order. Returns - :py:class:`TensorMap`(s) corresponding to the density correlations output - from the specified iteration(s). - - A density descriptor necessarily is body order 2 (i.e. correlation order 1), - but can be single- or multi-center. The output is a :py:class:`list` of - density correlations for each iteration specified in `output_selection`, up - to the target order passed in `correlation_order`. By default only the last - correlation (i.e. the correlation of order ``correlation_order``) is - returned. + Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor with + itself up to the desired correlation order. Returns :py:class:`TensorMap` + corresponding to the density correlations output from the specified iteration(s). + + A density descriptor necessarily is body order 2 (i.e. correlation order 1), but can + be single- or multi-center. The output is a :py:class:`list` of density correlations + for each iteration specified in `output_selection`, up to the target order passed in + `correlation_order`. By default only the last correlation (i.e. the correlation of + order ``correlation_order``) is returned. This function is an iterative special case of the more general - :py:func:`correlate_tensors`. As a density is being correlated with itself, - some redundant CG tensor products can be skipped with the `skip_redundant` - keyword. + :py:func:`correlate_tensors`. As a density is being correlated with itself, some + redundant CG tensor products can be skipped with the `skip_redundant` keyword. Selections on the angular and parity channels at each iteration can also be controlled with arguments `angular_cutoff`, `angular_selection` and `parity_selection`. - :param max_angular: The maximum angular order for which CG coefficients - should be computed and stored. This must be large enough to cover the - maximum angular order reached in the CG iterations on a density input to - the :py:meth:`compute` method. - :param correlation_order: The desired correlation order of the output - descriptor. Must be >= 1. - :param angular_cutoff: The maximum angular channel to compute at any given - CG iteration, applied globally to all iterations until the target - correlation order is reached. - :param selected_keys: :py:class:`Labels` or `List[:py:class:`Labels`]` - specifying the angular and/or parity channels to output at each - iteration. All :py:class:`Labels` objects passed here must only contain - key names "spherical_harmonics_l" and "inversion_sigma". If a single - :py:class:`Labels` object is passed, this is applied to the final - iteration only. If a :py:class:`list` of :py:class:`Labels` objects is - passed, each is applied to its corresponding iteration. If None is - passed, all angular and parity channels are output at each iteration, - with the global `angular_cutoff` applied if specified. - :param skip_redundant: Whether to skip redundant CG combinations. Defaults - to False, which means all combinations are performed. If a - :py:class:`list` of :py:class:`bool` is passed, this is applied to each - iteration. If a single :py:class:`bool` is passed, this is applied to - all iterations. - :param output_selection: A :py:class:`list` of :py:class:`bool` specifying - whether to output a :py:class:`TensorMap` for each iteration. If a - single :py:class:`bool` is passed as True, outputs from all iterations - will be returned. If a :py:class:`list` of :py:class:`bool` is passed, - this controls the output at each corresponding iteration. If None is - passed, only the final iteration is output. - :param arrays_backend: Determines the array backend, either "numpy" or - "torch". - :param cg_backend: Determines the backend for the CG combination. It can be - even "python-sparse", "python-dense" or "mops". If the CG combination - performs on the sparse coefficients, it means that for each (l1, l2, - lambda) block the (m1, m2, mu) coefficients are stored in a sparse - format only storing the nonzero coefficients. If the parameter are None, - the most optimal choice is determined given available packages and - ``arrays_backend``. - - "python-dense": Uses the python implementation performing the - combinations with the dense CG coefficients. - - "python-sparse": Uses the python implementation performing - the combinations with the sparse CG coefficients. - - "mops": Uses the package ``mops`` that optimized the sparse - combinations. At the moment it is only available with "numpy" as - ``arrays_backend`` - - :return: A :py:class:`list` of :py:class:`TensorMap` corresponding to the - density correlations output from the specified iterations. If the output - from a single iteration is requested, a :py:class:`TensorMap` is - returned instead. + :param max_angular: The maximum angular order for which CG coefficients should be + computed and stored. This must be large enough to cover the maximum angular + order reached in the CG iterations on a density input to the :py:meth:`compute` + method. + :param correlation_order: The desired correlation order of the output descriptor. + Must be >= 1. + :param angular_cutoff: The maximum angular channel to compute at any given CG + iteration, applied globally to all iterations until the target correlation order + is reached. + :param selected_keys: :py:class:`Labels` or `List[:py:class:`Labels`]` specifying + the angular and/or parity channels to output at each iteration. All + :py:class:`Labels` objects passed here must only contain key names + "spherical_harmonics_l" and "inversion_sigma". If a single :py:class:`Labels` + object is passed, this is applied to the final iteration only. If a + :py:class:`list` of :py:class:`Labels` objects is passed, each is applied to its + corresponding iteration. If None is passed, all angular and parity channels are + output at each iteration, with the global `angular_cutoff` applied if specified. + :param skip_redundant: Whether to skip redundant CG combinations. Defaults to False, + which means all combinations are performed. If a :py:class:`list` of + :py:class:`bool` is passed, this is applied to each iteration. If a single + :py:class:`bool` is passed, this is applied to all iterations. + :param output_selection: A :py:class:`list` of :py:class:`bool` specifying whether + to output a :py:class:`TensorMap` for each iteration. If a single + :py:class:`bool` is passed as True, outputs from all iterations will be + returned. If a :py:class:`list` of :py:class:`bool` is passed, this controls the + output at each corresponding iteration. If None is passed, only the final + iteration is output. + :param arrays_backend: Determines the array backend, either "numpy" or "torch". + :param cg_backend: Determines the backend for the CG combination. It can be even + "python-sparse", "python-dense" or "mops". If the CG combination performs on the + sparse coefficients, it means that for each (l1, l2, lambda) block the (m1, m2, + mu) coefficients are stored in a sparse format only storing the nonzero + coefficients. If the parameter are None, the most optimal choice is determined + given available packages and ``arrays_backend``. + + - "python-dense": Uses the python implementation performing the combinations + with the dense CG coefficients. + - "python-sparse": Uses the python implementation performing the + combinations with the sparse CG coefficients. + - "mops": Uses the ``mops`` package to optimize the sparse combinations. At + the moment it is only available with ``arrays_backend="numpy"`` + + :return: A :py:class:`list` of :py:class:`TensorMap` corresponding to the density + correlations output from the specified iterations. If the output from a single + iteration is requested, a :py:class:`TensorMap` is returned instead. """ _selected_keys: List[Union[Labels, None]] @@ -234,19 +226,24 @@ def __init__( ) def forward(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: - """TODO""" + """ + Calls the :py:meth:`DensityCorrelations.compute` function. + + This is intended for :py:class:`torch.nn.Module` compatibility, and should be + ignored in pure Python mode. + """ return self.compute(density) def compute(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: """ - Computes the density correlations by taking iterative Clebsch-Gordan - (CG) tensor products of the input `density` descriptor with itself. - - :param density: A density descriptor of body order 2 (correlation order - 1), in :py:class:`TensorMap` format. This may be, for example, a - rascaline :py:class:`SphericalExpansion` or - :py:class:`LodeSphericalExpansion`. Alternatively, this could be - multi-center descriptor, such as a pair density. + Computes the density correlations by taking iterative Clebsch-Gordan (CG) tensor + products of the input `density` descriptor with itself. + + :param density: A density descriptor of body order 2 (correlation order 1), in + :py:class:`TensorMap` format. This may be, for example, a rascaline + :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. + Alternatively, this could be multi-center descriptor, such as a pair + density. """ return self._correlate_density( density, @@ -259,16 +256,15 @@ def compute_metadata( density: TensorMap, ) -> Union[TensorMap, List[TensorMap]]: """ - Returns the metadata-only :py:class:`TensorMap`(s) that would be output - by the function :py:meth:`compute` for the same calculator under the - same settings, without performing the actual Clebsch-Gordan tensor - products. - - :param density: A density descriptor of body order 2 (correlation order - 1), in :py:class:`TensorMap` format. This may be, for example, a - rascaline :py:class:`SphericalExpansion` or - :py:class:`LodeSphericalExpansion`. Alternatively, this could be - multi-center descriptor, such as a pair density. + Returns the metadata-only :py:class:`TensorMap` that would be output by the + function :py:meth:`compute` for the same calculator under the same settings, + without performing the actual Clebsch-Gordan tensor products. + + :param density: A density descriptor of body order 2 (correlation order 1), in + :py:class:`TensorMap` format. This may be, for example, a rascaline + :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. + Alternatively, this could be multi-center descriptor, such as a pair + density. """ return self._correlate_density( density, diff --git a/python/rascaline/rascaline/utils/power_spectrum/calculator.py b/python/rascaline/rascaline/utils/power_spectrum/calculator.py index fcee1e2e4..f64bbb7c1 100644 --- a/python/rascaline/rascaline/utils/power_spectrum/calculator.py +++ b/python/rascaline/rascaline/utils/power_spectrum/calculator.py @@ -324,7 +324,12 @@ def forward( gradients: Optional[List[str]] = None, use_native_system: bool = True, ) -> TensorMap: - """TODO""" + """ + Calls the :py:meth:`PowerSpectrum.compute` function. + + This is intended for :py:class:`torch.nn.Module` compatibility, and should be + ignored in pure Python mode. + """ return self.compute( systems=systems, From f4337f4f93e044ab9be6a5c63e481d13367c8403 Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Mon, 19 Feb 2024 14:35:47 +0100 Subject: [PATCH 20/23] linter noqa --- python/rascaline-torch/rascaline/torch/__init__.py | 8 ++++---- .../rascaline/rascaline/utils/power_spectrum/__init__.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/rascaline-torch/rascaline/torch/__init__.py b/python/rascaline-torch/rascaline/torch/__init__.py index 226cde60b..0d63d3242 100644 --- a/python/rascaline-torch/rascaline/torch/__init__.py +++ b/python/rascaline-torch/rascaline/torch/__init__.py @@ -9,12 +9,12 @@ _load_library() -from . import utils # noqa -from .calculator_base import CalculatorModule, register_autograd # noqa +from . import utils # noqa: E402, F401 +from .calculator_base import CalculatorModule, register_autograd # noqa: E402, F401 # don't forget to also update `rascaline/__init__.py` and # `rascaline/torch/calculators.py` when modifying this file -from .calculators import ( # noqa +from .calculators import ( # noqa: E402, F401 AtomicComposition, LodeSphericalExpansion, NeighborList, @@ -24,7 +24,7 @@ SphericalExpansion, SphericalExpansionByPair, ) -from .system import systems_to_torch # noqa +from .system import systems_to_torch # noqa: E402, F401 __all__ = [ diff --git a/python/rascaline/rascaline/utils/power_spectrum/__init__.py b/python/rascaline/rascaline/utils/power_spectrum/__init__.py index feecf6d9e..bf9f8f5d7 100644 --- a/python/rascaline/rascaline/utils/power_spectrum/__init__.py +++ b/python/rascaline/rascaline/utils/power_spectrum/__init__.py @@ -1 +1 @@ -from .calculator import PowerSpectrum +from .calculator import PowerSpectrum # noqa: F401 From 96cee2a34f8b091fbd7be539ace3ed529216f0ef Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Mon, 19 Feb 2024 17:58:25 +0100 Subject: [PATCH 21/23] Make the CG cache a function not a class. Fix the mops CG cache and include a test. --- .../utils/clebsch_gordan/_cg_cache.py | 540 ++++++++---------- .../utils/clebsch_gordan/correlate_density.py | 16 +- .../tests/utils/correlate_density.py | 42 +- 3 files changed, 299 insertions(+), 299 deletions(-) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index e2b43d81d..d4d5bd9e7 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -4,20 +4,13 @@ """ import math -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Tuple import numpy as np import wigners from .. import _dispatch -from .._backend import ( - Array, - Labels, - TensorBlock, - TensorMap, - TorchModule, - torch_jit_is_scripting, -) +from .._backend import Array, Labels, TensorBlock, TensorMap, torch_jit_is_scripting try: @@ -54,128 +47,147 @@ class torch_device: ) -# ================================= -# ===== ClebschGordanReal class -# ================================= +def calculate_cg_coefficients( + lambda_max: int, + sparse: bool = True, + use_mops: Optional[bool] = None, + use_torch: bool = False, +) -> TensorMap: + """ + Calculates the Clebsch-Gordan coefficients for all possible combination of l1 and + l2, up to ``lambda_max``. Returns them in :py:class:`TensorMap` format. + The output data structure of the output :py:class:`TensorMap` depends on the backend + used to perform CG tensor products, currently: ["python-dense", "python-sparse", + "mops"]. -class ClebschGordanReal(TorchModule): - """ - Class for computing Clebsch-Gordan coefficients for real spherical - harmonics. + ``cg_backend="python-dense"``: + - samples: `sample`, i.e. a dummy sample. + - components: `[(m1,), (m2,), (mu,)]`, i.e. on separate components axes, + where `m1` and `m2` are the m component values for the two arrays + being combined and `mu` is the m component value for the resulting + array. + - properties: `property`, i.e. a dummy property. + + ``cg_backend="python-sparse"`` or ``cg_backend="mops"`` (i.e. sparse with MOPS):: + - samples: `(m1, m2, mu)`, where `m1` and `m2` are the m component + values for the two arrays being combined and `mu` is the m component + value for the resulting array. + - components: `[]`, i.e. no components axis. + - properties: `property`, i.e. a dummy property. - Stores the coefficients in the `self._cg_coeffs` attribute in TensorMap - format, which is built at initialization. :param lambda_max: maximum lambda value to compute CG coefficients for. :param sparse: whether to store the CG coefficients in sparse format. - :param use_mops: whether to store the CG coefficients in MOPS sparse format. - This is recommended as the default for sparse accumulation, but can only - be used if Mops is installed. + :param use_mops: whether to store the CG coefficients in MOPS sparse format. This is + recommended as the default for sparse accumulation, but can only be used if Mops + is installed. :param use_torch: whether torch tensor or numpy arrays should be used for the cg coeffs - """ - def __init__( - self, - lambda_max: int, - sparse: bool = True, - use_mops: Optional[bool] = None, - use_torch: bool = False, - ): - super().__init__() - self._lambda_max = lambda_max - self._sparse = sparse - - # For TorchScript we declare type - self._use_mops: bool = False - if sparse: - if use_mops is None: - self._use_mops = HAS_MOPS - # TODO: provide a warning once Mops is fully ready - # import warnings - # warnings.warn( - # "It is recommended to use MOPS for sparse accumulation. " - # " This can be installed with ``pip install" - # " git+https://github.com/lab-cosmo/mops`." - # " Falling back to numpy for now." - # ) - else: - if use_mops and not HAS_MOPS: - raise ImportError("Specified to use MOPS, but it is not installed.") - else: - self._use_mops = use_mops + :returns: :py:class:`TensorMap` of the Clebsch-Gordan coefficients. + """ + # Parse the various boolean options for array and CG combination backends + sparse, use_mops, use_torch = _parse_backend_options(sparse, use_mops, use_torch) + # Build some 'like' arrays for the backend dispatch + if use_torch: + complex_like = torch.empty(0, dtype=torch.complex128) + double_like = torch.empty(0, dtype=torch.double) + if isinstance(Labels, torch.ScriptClass): + labels_values_like = torch.empty(0, dtype=torch.double) else: - # The logic is a bit complicated so TorchScript can understand that it is - # not None - if use_mops is None: - self._use_mops = False - # TODO: provide a warning once Mops is fully ready - # if HAS_MOPS: - # import warnings - # warnings.warn( - # "Mops is installed, but not being used" - # " as dense operations chosen." - # ) - elif use_mops: - raise ImportError("MOPS is not available for non sparse operations.") - else: - self._use_mops = False + labels_values_like = np.empty(0, dtype=np.double) + else: + complex_like = np.empty(0, dtype=np.complex128) + double_like = np.empty(0, dtype=np.double) + labels_values_like = np.empty(0, dtype=np.double) - if torch_jit_is_scripting(): - if not use_torch: - raise ValueError( - "use_torch is False, but this option is not supported when torch" - " scripted." - ) - self._use_torch = True - else: - self._use_torch = use_torch + # Calculate the CG coefficients, stored as a dict of dense arrays. This is the + # starting point for the conversion to a TensorMap of different formats depending on + # the backend options. + cg_coeff_dict = _build_dense_cg_coeff_dict( + lambda_max, complex_like, double_like, labels_values_like + ) - self._cg_coeffs = _build_cg_coeff_dict( - self._lambda_max, - sparse, - self._use_mops, - self._use_torch, + # Build the CG cache depending on whether the CG backend is sparse or dense. The + # dispatching of the arrays backends are accounted for by `double_like` and + # `labels_values_like`. + if sparse: + return _cg_coeff_dict_to_tensormap_sparse( + cg_coeff_dict, double_like, labels_values_like ) + return _cg_coeff_dict_to_tensormap_dense( + cg_coeff_dict, double_like, labels_values_like + ) + -def _build_cg_coeff_dict( - lambda_max: int, sparse: bool, use_mops: bool, use_torch: bool -): +def _parse_backend_options( + sparse: bool, use_mops: bool, use_torch: bool +) -> Tuple[bool]: """ - Builds a dictionary of Clebsch-Gordan coefficients for all possible - combination of l1 and l2, up to lambda_max. + Parses the boolean arguments for controlling the backend array and CG coefficient + calculation options. - This is an intermediate data structure, as the dictionary is converted to a - TensorMap by calling the :py:func:`_cg_coeff_dict_to_tensormap` function. - For transparency, the intermediate dict data structure is described here. + Raises an error for invalid options, or returns them as a tuple of booleans. + """ + _use_mops: bool = False # declare type for TorchScript + if sparse: + if use_mops is None: + _use_mops = HAS_MOPS + # TODO: provide a warning once Mops is fully ready + # import warnings + # warnings.warn( + # "It is recommended to use MOPS for sparse accumulation. " + # " This can be installed with ``pip install" + # " git+https://github.com/lab-cosmo/mops`." + # " Falling back to numpy for now." + # ) + else: + if use_mops and not HAS_MOPS: + raise ImportError("Specified to use MOPS, but it is not installed.") + else: + _use_mops = use_mops - There are 3 current use cases for the format of these coefficients, and for - each the intermediate dict has a different data structure. + else: + # The logic is a bit complicated so TorchScript can understand that it is + # not None + if use_mops is None: + _use_mops = False + # TODO: provide a warning once Mops is fully ready + # if HAS_MOPS: + # import warnings + # warnings.warn( + # "Mops is installed, but not being used" + # " as dense operations chosen." + # ) + elif use_mops: + raise ImportError("MOPS is not available for non sparse operations.") + else: + _use_mops = False - Case 1: standard sparse format. + if torch_jit_is_scripting(): + if not use_torch: + raise ValueError( + "use_torch is False, but this option is not supported when torch" + " scripted." + ) + _use_torch = True + else: + _use_torch = use_torch - Each dictionary entry is a dictionary with entries for each (m1, m2, mu) - combination. + return sparse, _use_mops, _use_torch - { - (l1, l2, lambda): { - (m1, m2, mu) : cg_{m1, m2, mu}^{l1, l2, lambda} - for m1 in range(-l1, l1 + 1), - for m2 in range(-l2, l2 + 1), - }, - ... - for l1 in range(0, l1_list) - for l2 in range(0, l2_list) - for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|)) - } - Case 2: standard dense format. +def _build_dense_cg_coeff_dict( + lambda_max: int, complex_like: Array, double_like: Array, labels_values_like: Array +) -> Dict[int, Array]: + """ + Calculates the CG coefficients and stores them as dense arrays in a dictionary. - Each dictionary entry is a dense array with shape (2 * l1 + 1, 2 * l2 + 1, 2 - * lambda + 1). + Each dictionary entry is a dense array with shape + (2 * l1 + 1, 2 * l2 + 1, 2 * lambda + 1). { (l1, l2, lambda): @@ -194,63 +206,28 @@ def _build_cg_coeff_dict( for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|)) } - Case 3: MOPS sparse format. - - Each dictionary entry contains a tuple with four 1D arrays, corresponding to - the CG coeffs and m1, m2, mu indices respectively. All of these arrays are - sorted according to the mu index. This format is used for Sparse - Accumulation of Products (SAP) as implemented in MOPS. See - https://github.com/lab-cosmo/mops . - - { - (l1, l2, lambda): - ( - [ - cg_{m1, m2, mu}^{l1, l2, lambda} - ... - for m1 in range(-l1, l1 + 1), - for m2 in range(-l2, l2 + 1), - for mu in range(-lambda, lambda + 1) - ], - [ - m1 for m1 in range(-l1, l1 + 1), - ], - [ - m2 for m2 in range(-l2, l2 + 1), - ], - [ - mu for mu in range(-lambda, lambda + 1), - ], - ) - - - } - where `cg_{m1, m2, mu}^{l1, l2, lambda}` is the Clebsch-Gordan coefficient that describes the combination of the `m1` irreducible component of the `l1` angular channel and the `m2` irreducible component of the `l2` angular channel into the irreducible tensor of order `lambda`. In all cases, these correspond to the non-zero CG coefficients, i.e. those in the range |-l, ..., +l| for each angular order l in {l1, l2, lambda}. + + :param lambda_max: maximum lambda value to compute CG coefficients for. + :param complex_like: an empty array of dtype complex, used for dispatching + operations + :param double_like: an empty array of dtype double, used for dispatching + operations + :param labels_values_like: an empty array of dtype double, used for dispatching + operations + + :returns: dictionary of dense CG coefficients. """ # real-to-complex and complex-to-real transformations as matrices r2c: Dict[int, Array] = {} c2r: Dict[int, Array] = {} - coeff_dict = {} - if use_torch: - complex_like = torch.empty(0, dtype=torch.complex128) - double_like = torch.empty(0, dtype=torch.double) - if isinstance(Labels, torch.ScriptClass): - labels_values_like = torch.empty(0, dtype=torch.double) - else: - labels_values_like = np.empty(0, dtype=np.double) - else: - complex_like = np.empty(0, dtype=np.complex128) - double_like = np.empty(0, dtype=np.double) - labels_values_like = np.empty(0, dtype=np.double) - for lambda_ in range(0, lambda_max + 1): c2r[lambda_] = _complex2real(lambda_, like=complex_like) r2c[lambda_] = _real2complex(lambda_, like=complex_like) @@ -281,153 +258,112 @@ def _build_cg_coeff_dict( else: cg_l1l2lam_dense = _dispatch.imag(real_cg) - if sparse: - # Find the m1, m2, mu idxs of the nonzero CG coeffs - nonzeros_cg_coeffs_idx = _dispatch.where( - _dispatch.abs(cg_l1l2lam_dense) > 1e-15 - ) - if use_mops: - # Store CG coeffs in a specific format for use in - # MOPS. Here we need the m1, m2, mu, and CG coeffs - # to be stored as separate 1D arrays. - m1_arr: List[int] = [] - m2_arr: List[int] = [] - mu_arr: List[int] = [] - C_arr: List[float] = [] - for i in range(len(nonzeros_cg_coeffs_idx[0])): - m1 = int(nonzeros_cg_coeffs_idx[0][i]) - m2 = int(nonzeros_cg_coeffs_idx[1][i]) - mu = int(nonzeros_cg_coeffs_idx[2][i]) - m1_arr.append(m1) - m2_arr.append(m2) - mu_arr.append(mu) - C_arr.append(float(cg_l1l2lam_dense[m1, m2, mu])) - - # Reorder the arrays based on sorted mu values - mu_idxs = _dispatch.argsort( - _dispatch.int_array_like(mu_arr, double_like) - ) - m1_arr = _dispatch.int_array_like(m1_arr, double_like)[mu_idxs] - m2_arr = _dispatch.int_array_like(m2_arr, double_like)[mu_idxs] - mu_arr = _dispatch.int_array_like(mu_arr, double_like)[mu_idxs] - C_arr = _dispatch.double_array_like(C_arr, double_like)[mu_idxs] - cg_l1l2lam_sparse = (C_arr, m1_arr, m2_arr, mu_arr) - coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam_sparse - else: - # Otherwise fall back to torch/numpy and store as - # sparse dicts. - cg_l1l2lam_sparse = {} - for i in range(len(nonzeros_cg_coeffs_idx[0])): - m1 = nonzeros_cg_coeffs_idx[0][i] - m2 = nonzeros_cg_coeffs_idx[1][i] - mu = nonzeros_cg_coeffs_idx[2][i] - cg_l1l2lam_sparse[(m1, m2, mu)] = cg_l1l2lam_dense[ - m1, m2, mu - ] - coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam_sparse - else: - # Store - coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam_dense + coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam_dense - return _cg_coeff_dict_to_tensormap( - coeff_dict, sparse, double_like, labels_values_like - ) + return coeff_dict -def _cg_coeff_dict_to_tensormap( - coeff_dict: Dict, sparse: bool, double_like, labels_values_like -): +def _cg_coeff_dict_to_tensormap_dense( + coeff_dict: Dict, double_like: Array, labels_values_like: Array +) -> TensorMap: + """ + Converts the dictionary of dense CG coefficients coefficients to + :py:class:`TensorMap` format, specifically for performing CG tensor products with + the "python-dense" backend. """ - Converts the dictionary of Clebsch-Gordan coefficients to - :py:class:`TensorMap` format, whose data structure depends on whether they - will be used for sparse or dense operations. + keys = Labels( + ["l1", "l2", "lambda"], + _dispatch.int_array_like(list(coeff_dict.keys()), labels_values_like), + ) + blocks = [] - For both, keys are indexed by `(l1, l2, lambda)`, which stores CG - coefficients for the combination of two blocks (of order where `l1` and `l2` - respectively) to angular order `lambda`. + for l1l2lam_values in coeff_dict.values(): + # extending shape by samples and properties + block_value_shape = (1,) + l1l2lam_values.shape + (1,) + blocks.append( + TensorBlock( + values=_dispatch.contiguous(l1l2lam_values.reshape(block_value_shape)), + samples=Labels.range("sample", 1), + components=[ + Labels( + ["m1"], + _dispatch.int_range_like( + 0, l1l2lam_values.shape[0], labels_values_like + ).reshape(-1, 1), + ), + Labels( + ["m2"], + _dispatch.int_range_like( + 0, l1l2lam_values.shape[1], labels_values_like + ).reshape(-1, 1), + ), + Labels( + ["mu"], + _dispatch.int_range_like( + 0, l1l2lam_values.shape[2], labels_values_like + ).reshape(-1, 1), + ), + ], + properties=Labels.range("property", 1), + ) + ) - Each block then has a different structure for performing sparse and dense - combinations + return TensorMap(keys, blocks) - Sparse: - - samples: `(m1, m2, mu)`, where `m1` and `m2` are the m component - values for the two arrays being combined and `mu` is the m component - value for the resulting array. - - components: `[]`, i.e. no components axis. - - properties: `property`, i.e. a dummy property. - Dense: - - samples: `sample`, i.e. a dummy sample. - - components: `[(m1,), (m2,), (mu,)]`, i.e. on separate components axes, - where `m1` and `m2` are the m component values for the two arrays - being combined and `mu` is the m component value for the resulting - array. - - properties: `property`, i.e. a dummy property. +def _cg_coeff_dict_to_tensormap_sparse( + coeff_dict: Dict, double_like: Array, labels_values_like: Array +) -> TensorMap: + """ + Converts the dictionary of dense CG coefficients coefficients to + :py:class:`TensorMap` format, specifically for performing CG tensor products with + the "python-sparse" backend. """ + dict_keys = list(coeff_dict.keys()) + keys = Labels( + ["l1", "l2", "lambda"], + _dispatch.int_array_like(list(dict_keys), labels_values_like), + ) blocks = [] - if sparse: - for l1l2lam_dict in coeff_dict.values(): - l1l2lam_sample_values = [] - for m1m2mu_key in l1l2lam_dict.keys(): - l1l2lam_sample_values.append(m1m2mu_key) - # extending shape by samples and properties - values = _dispatch.double_array_like( - [*l1l2lam_dict.values()], double_like - ).reshape(-1, 1) - l1l2lam_sample_values = _dispatch.int_array_like( - l1l2lam_sample_values, labels_values_like - ) - # we have to move put the m1 m2 m3 inside a block so we can access it easier - # inside cg combine function, - blocks.append( - TensorBlock( - values=_dispatch.contiguous(values), - samples=Labels(["m1", "m2", "mu"], l1l2lam_sample_values), - components=[], - properties=Labels.range("property", 1), - ) - ) - keys = Labels( - ["l1", "l2", "lambda"], - _dispatch.int_array_like(list(coeff_dict.keys()), labels_values_like), + + # For each (l1, l2, lambda) combination, build a TensorBlock of non-zero CG coeffs + for l1, l2, lambda_ in dict_keys: + cg_l1l2lam_dense = coeff_dict[(l1, l2, lambda_)] + + # Find the dense indices of the non-zero CG coeffs + nonzeros_cg_coeffs_idx = _dispatch.where( + _dispatch.abs(cg_l1l2lam_dense) > 1e-15 ) - else: - keys = Labels( - ["l1", "l2", "lambda"], - _dispatch.int_array_like(list(coeff_dict.keys()), labels_values_like), + + # Create a sparse dictionary indexed by of the non-zero CG coeffs + cg_l1l2lam_sparse = {} + for i in range(len(nonzeros_cg_coeffs_idx[0])): + m1 = nonzeros_cg_coeffs_idx[0][i] + m2 = nonzeros_cg_coeffs_idx[1][i] + mu = nonzeros_cg_coeffs_idx[2][i] + cg_l1l2lam_sparse[(m1, m2, mu)] = cg_l1l2lam_dense[m1, m2, mu] + + l1l2lam_sample_values = [] + for m1m2mu_key in cg_l1l2lam_sparse.keys(): + l1l2lam_sample_values.append(m1m2mu_key) + # extending shape by samples and properties + values = _dispatch.double_array_like( + [*cg_l1l2lam_sparse.values()], double_like + ).reshape(-1, 1) + l1l2lam_sample_values = _dispatch.int_array_like( + l1l2lam_sample_values, labels_values_like ) - for l1l2lam_values in coeff_dict.values(): - # extending shape by samples and properties - block_value_shape = (1,) + l1l2lam_values.shape + (1,) - blocks.append( - TensorBlock( - values=_dispatch.contiguous( - l1l2lam_values.reshape(block_value_shape) - ), - samples=Labels.range("sample", 1), - components=[ - Labels( - ["m1"], - _dispatch.int_range_like( - 0, l1l2lam_values.shape[0], labels_values_like - ).reshape(-1, 1), - ), - Labels( - ["m2"], - _dispatch.int_range_like( - 0, l1l2lam_values.shape[1], labels_values_like - ).reshape(-1, 1), - ), - Labels( - ["mu"], - _dispatch.int_range_like( - 0, l1l2lam_values.shape[2], labels_values_like - ).reshape(-1, 1), - ), - ], - properties=Labels.range("property", 1), - ) + # we have to move put the m1 m2 m3 inside a block so we can access it easier + # inside cg combine function, + blocks.append( + TensorBlock( + values=_dispatch.contiguous(values), + samples=Labels(["m1", "m2", "mu"], l1l2lam_sample_values), + components=[], + properties=Labels.range("property", 1), ) + ) + return TensorMap(keys, blocks) @@ -612,7 +548,7 @@ def empty_combine( lambda_: int, ) -> Array: """ - Returns a Clebsch-Gordan combination step on two arrays using sparse + Returns a Clebsch-Gordan combination step on two arrays using sparse operations """ # Samples dimensions must be the same assert array_1.shape[0] == array_2.shape[0] @@ -693,7 +629,9 @@ def sparse_combine( return array_out elif isinstance(array_1, np.ndarray) and cg_backend == "mops": - # Reshape + # MOPS sparse accumulation requires some reshaping of the input arrays. See + # https://github.com/lab-cosmo/mops . Currently only supported for a numpy array + # backend. array_1 = np.repeat(array_1[:, :, :, None], n_q, axis=3).reshape( n_i, 2 * l1 + 1, n_p * n_q ) @@ -704,11 +642,31 @@ def sparse_combine( array_1 = _dispatch.swapaxes(array_1, 1, 2).reshape(n_i * n_p * n_q, 2 * l1 + 1) array_2 = _dispatch.swapaxes(array_2, 1, 2).reshape(n_i * n_p * n_q, 2 * l2 + 1) + # We also need to pass SAP the CG coefficients and m1, m2, and mu indices as 1D + # arrays. Extract these from the corresponding TensorBlock in the TensorMap CG + # cache. + block = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}) + samples = block.samples + + m1_arr: List[int] = [] + m2_arr: List[int] = [] + mu_arr: List[int] = [] + C_arr: List[float] = [] + for sample_i, (m1, m2, mu) in enumerate(samples): + + m1_arr.append(int(m1)) + m2_arr.append(int(m2)) + mu_arr.append(int(mu)) + C_arr.append(float(block.values[sample_i, 0])) + # Do SAP array_out = sap( - array_1, - array_2, - *cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}).values.flatten(), + A=array_1, + B=array_2, + C=C_arr, + indices_A=m1_arr, + indices_B=m2_arr, + indices_output=mu_arr, output_size=2 * lambda_ + 1, ) assert array_out.shape == (n_i * n_p * n_q, 2 * lambda_ + 1) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index d00892988..a601473f9 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -159,10 +159,18 @@ def __init__( elif cg_backend == "mops": if arrays_backend == "torch": raise NotImplementedError( - "'numpy' was determined or given as `arrays_backend` " + "'torch' was determined or given as `arrays_backend` " "and 'mops' was given as `cg_backend`, " "but mops does not support torch backend yet" ) + else: + assert arrays_backend == "numpy" + if not HAS_MOPS: + raise ImportError( + "mops is not installed, but 'mops' was given as `cg_backend`" + ) + self._cg_backend = "mops" + else: raise ValueError( f"Unknown `cg_backend` {cg_backend}." @@ -186,12 +194,12 @@ def __init__( sparse = True use_mops = True - self._cg_coefficients = _cg_cache.ClebschGordanReal( - self._max_angular, + self._cg_coefficients = _cg_cache.calculate_cg_coefficients( + lambda_max=self._max_angular, sparse=sparse, use_mops=use_mops, use_torch=(arrays_backend == "torch"), - )._cg_coeffs + ) # Check inputs if correlation_order <= 1: diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index f2473b1cf..59da6ba02 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -9,7 +9,7 @@ import rascaline from rascaline.utils import PowerSpectrum, _dispatch -from rascaline.utils.clebsch_gordan._cg_cache import ClebschGordanReal +from rascaline.utils.clebsch_gordan._cg_cache import calculate_cg_coefficients from rascaline.utils.clebsch_gordan._clebsch_gordan import _standardize_keys from rascaline.utils.clebsch_gordan.correlate_density import DensityCorrelations @@ -31,6 +31,12 @@ HAS_SYMPY = True except ImportError: HAS_SYMPY = False +try: + from mops import sparse_accumulation_of_products as sap # noqa F401 + + HAS_MOPS = True +except ImportError: + HAS_MOPS = False if HAS_SYMPY: from .rotations import WignerDReal, transform_frame_o3, transform_frame_so3 @@ -376,9 +382,9 @@ def test_clebsch_gordan_orthogonality(l1, l2, arrays_backend): https://en.wikipedia.org/wiki/Clebsch%E2%80%93Gordan_coefficients#Orthogonality_relations for details. """ - cg_coeffs = ClebschGordanReal( + cg_coeffs = calculate_cg_coefficients( lambda_max=5, sparse=False, use_torch=arrays_backend == "torch" - )._cg_coeffs + ) lam_min = abs(l1 - l2) lam_max = l1 + l2 @@ -450,7 +456,7 @@ def test_clebsch_gordan_orthogonality(l1, l2, arrays_backend): ) def test_correlate_density_dense_sparse_agree(): """ - Tests for agreement between nu=3 tensors built using both sparse and dense + Tests for agreement between nu=2 tensors built using both sparse and dense CG coefficient caches. """ frames = h2o_periodic() @@ -475,6 +481,34 @@ def test_correlate_density_dense_sparse_agree(): assert metatensor.allclose(n_body_sparse, n_body_dense, atol=1e-8, rtol=1e-8) +@pytest.mark.skipif(not HAS_MOPS, reason="mops is not installed") +def test_correlate_density_mops_python_sparse_agree(): + """ + Tests for agreement between nu=2 tensors built using both "python-sparse" + and "mops" CG backend. + """ + frames = h2o_periodic() + density = spherical_expansion_small(frames) + + correlation_order = 2 + corr_calculator_python = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, + correlation_order=correlation_order, + cg_backend="python-sparse", + ) + corr_calculator_mops = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, + correlation_order=correlation_order, + cg_backend="mops", + ) + # NOTE: testing the private function here so we can control the use of + # sparse v dense CG cache + n_body_python = corr_calculator_python.compute(density) + n_body_mops = corr_calculator_mops.compute(density) + + assert metatensor.allclose(n_body_python, n_body_mops, atol=1e-8, rtol=1e-8) + + # ============ Test metadata ============ From 7c77adc6515c62768ce1b166a383b2d16f7e9409 Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Wed, 21 Feb 2024 14:39:14 +0100 Subject: [PATCH 22/23] Review round 2 --- .../tests/utils/correlate_density.py | 14 +- .../tests/utils/data/h2o_isolated.xyz | 5 - python/rascaline/rascaline/utils/_dispatch.py | 89 ---- .../utils/clebsch_gordan/_cg_cache.py | 390 ++++++++---------- .../utils/clebsch_gordan/_clebsch_gordan.py | 10 - .../utils/clebsch_gordan/correlate_density.py | 14 +- 6 files changed, 174 insertions(+), 348 deletions(-) delete mode 100644 python/rascaline-torch/tests/utils/data/h2o_isolated.xyz diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py index 47126707d..9db042bd7 100644 --- a/python/rascaline-torch/tests/utils/correlate_density.py +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -16,6 +16,7 @@ DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") +@torch.jit.script def is_tensor_map(obj: Any): return isinstance(obj, TensorMap) @@ -38,7 +39,16 @@ def is_tensor_map(obj: Any): def h2o_isolated(): - return ase.io.read(os.path.join(DATA_ROOT, "h2o_isolated.xyz"), ":") + return [ + ase.Atoms( + symbols=["O", "H", "H"], + positions=[ + [2.56633400, 2.50000000, 2.50370100], + [1.97361700, 1.73067300, 2.47063400], + [1.97361700, 3.26932700, 2.47063400], + ], + ) + ] def spherical_expansion(frames: List[ase.Atoms]): @@ -93,7 +103,6 @@ def test_jit_save_load(): torch.jit.save(scripted_correlate_density, buffer) buffer.seek(0) torch.jit.load(buffer) - buffer.close() def test_save_load(): @@ -110,4 +119,3 @@ def test_save_load(): torch.save(corr_calculator, buffer) buffer.seek(0) torch.load(buffer) - buffer.close() diff --git a/python/rascaline-torch/tests/utils/data/h2o_isolated.xyz b/python/rascaline-torch/tests/utils/data/h2o_isolated.xyz deleted file mode 100644 index fc876d2ba..000000000 --- a/python/rascaline-torch/tests/utils/data/h2o_isolated.xyz +++ /dev/null @@ -1,5 +0,0 @@ -3 -pbc="F F F" -O 2.56633400 2.50000000 2.50370100 -H 1.97361700 1.73067300 2.47063400 -H 1.97361700 3.26932700 2.47063400 diff --git a/python/rascaline/rascaline/utils/_dispatch.py b/python/rascaline/rascaline/utils/_dispatch.py index 97aaf293f..f1254b4fa 100644 --- a/python/rascaline/rascaline/utils/_dispatch.py +++ b/python/rascaline/rascaline/utils/_dispatch.py @@ -32,15 +32,6 @@ def _check_all_torch_tensor(arrays: List[TorchTensor]): ) -# def _check_all_torch_tensor(arrays: List[TorchTensor]): -# for array in arrays: -# if not isinstance(array, TorchTensor): -# raise TypeError( -# f"expected argument to be a torch.Tensor, but got -# {type(array)}" -# ) - - def _check_all_np_ndarray(arrays): for array in arrays: if not isinstance(array, np.ndarray): @@ -49,14 +40,6 @@ def _check_all_np_ndarray(arrays): ) -# def _check_all_np_ndarray(arrays): -# for array in arrays: -# if not isinstance(array, np.ndarray): -# raise TypeError( -# f"expected argument to be a np.ndarray, but got {type(array)}" -# ) - - def concatenate(arrays: List[TorchTensor], axis: int): """ Concatenate a group of arrays along a given axis. @@ -78,16 +61,6 @@ def concatenate(arrays: List[TorchTensor], axis: int): raise TypeError(UNKNOWN_ARRAY_TYPE) -# def concatenate(arrays, axis: Optional[int] = 0): -# """Concatenate arrays along an axis.""" -# if isinstance(arrays[0], TorchTensor): -# return torch.cat(arrays, dim=axis) -# elif isinstance(arrays[0], np.ndarray): -# return np.concatenate(arrays, axis=axis) -# else: -# raise TypeError(UNKNOWN_ARRAY_TYPE) - - def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): """ Create an uninitialized array, with the given ``shape``, and similar dtype, @@ -114,33 +87,6 @@ def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = F raise TypeError(UNKNOWN_ARRAY_TYPE) -# def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): -# """ -# Create an empty array, with the given ``shape``, and similar dtype, device -# and other options as ``array``. - -# If ``shape`` is :py:obj:`None`, the array shape is used instead. -# ``requires_grad`` is only used for torch tensors, and set the corresponding -# value on the returned array. - -# This is the equivalent to ``np.empty_like(array, shape=shape)``. -# """ -# if isinstance(array, TorchTensor): -# if shape is None: -# shape = array.size() - -# return torch.empty( -# shape, -# dtype=array.dtype, -# layout=array.layout, -# device=array.device, -# ).requires_grad_(requires_grad) -# elif isinstance(array, np.ndarray): -# return np.empty_like(array, shape=shape, subok=False) -# else: -# raise TypeError(UNKNOWN_ARRAY_TYPE) - - def list_to_array(array, data: List[List[int]]): """Create an object from data with the same type as ``array``.""" if isinstance(array, TorchTensor): @@ -195,14 +141,6 @@ def unique(array, axis: Optional[int] = None): raise TypeError(UNKNOWN_ARRAY_TYPE) -# def unique(array, axis: Optional[int] = None): -# """Find the unique elements of an array.""" -# if isinstance(array, TorchTensor): -# return torch.unique(array, dim=axis) -# elif isinstance(array, np.ndarray): -# return np.unique(array, axis=axis) - - def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): """ Create an array filled with zeros, with the given ``shape``, and similar @@ -232,33 +170,6 @@ def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = F raise TypeError(UNKNOWN_ARRAY_TYPE) -# def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): -# """ -# Create an array filled with zeros, with the given ``shape``, and similar -# dtype, device and other options as ``array``. - -# If ``shape`` is :py:obj:`None`, the array shape is used instead. -# ``requires_grad`` is only used for torch tensors, and set the corresponding -# value on the returned array. - -# This is the equivalent to ``np.zeros_like(array, shape=shape)``. -# """ -# if isinstance(array, TorchTensor): -# if shape is None: -# shape = array.size() - -# return torch.zeros( -# shape, -# dtype=array.dtype, -# layout=array.layout, -# device=array.device, -# ).requires_grad_(requires_grad) -# elif isinstance(array, np.ndarray): -# return np.zeros_like(array, shape=shape, subok=False) -# else: -# raise TypeError(UNKNOWN_ARRAY_TYPE) - - def where(array): """Return the indices where `array` is True. diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index d4d5bd9e7..1ab14bace 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -4,13 +4,13 @@ """ import math -from typing import Dict, List, Optional, Tuple +from typing import Dict, List import numpy as np import wigners from .. import _dispatch -from .._backend import Array, Labels, TensorBlock, TensorMap, torch_jit_is_scripting +from .._backend import Array, Labels, TensorBlock, TensorMap try: @@ -50,46 +50,38 @@ class torch_device: def calculate_cg_coefficients( lambda_max: int, sparse: bool = True, - use_mops: Optional[bool] = None, use_torch: bool = False, ) -> TensorMap: """ Calculates the Clebsch-Gordan coefficients for all possible combination of l1 and l2, up to ``lambda_max``. Returns them in :py:class:`TensorMap` format. - The output data structure of the output :py:class:`TensorMap` depends on the backend - used to perform CG tensor products, currently: ["python-dense", "python-sparse", - "mops"]. + The output data structure of the output :py:class:`TensorMap` depends on whether the + backend used to perform CG tensor products uses sparse or dense operations. - ``cg_backend="python-dense"``: - - samples: `sample`, i.e. a dummy sample. + Dense: + - samples: `_`, i.e. a dummy sample. - components: `[(m1,), (m2,), (mu,)]`, i.e. on separate components axes, where `m1` and `m2` are the m component values for the two arrays being combined and `mu` is the m component value for the resulting array. - - properties: `property`, i.e. a dummy property. + - properties: `cg_coefficient` - ``cg_backend="python-sparse"`` or ``cg_backend="mops"`` (i.e. sparse with MOPS):: + Sparse: - samples: `(m1, m2, mu)`, where `m1` and `m2` are the m component values for the two arrays being combined and `mu` is the m component value for the resulting array. - components: `[]`, i.e. no components axis. - - properties: `property`, i.e. a dummy property. + - properties: `cg_coefficient` - :param lambda_max: maximum lambda value to compute CG coefficients for. + :param lambda_max: maximum angular momentum value to compute CG coefficients for. :param sparse: whether to store the CG coefficients in sparse format. - :param use_mops: whether to store the CG coefficients in MOPS sparse format. This is - recommended as the default for sparse accumulation, but can only be used if Mops - is installed. :param use_torch: whether torch tensor or numpy arrays should be used for the cg coeffs :returns: :py:class:`TensorMap` of the Clebsch-Gordan coefficients. """ - # Parse the various boolean options for array and CG combination backends - sparse, use_mops, use_torch = _parse_backend_options(sparse, use_mops, use_torch) - # Build some 'like' arrays for the backend dispatch if use_torch: complex_like = torch.empty(0, dtype=torch.complex128) @@ -123,71 +115,14 @@ def calculate_cg_coefficients( ) -def _parse_backend_options( - sparse: bool, use_mops: bool, use_torch: bool -) -> Tuple[bool]: - """ - Parses the boolean arguments for controlling the backend array and CG coefficient - calculation options. - - Raises an error for invalid options, or returns them as a tuple of booleans. - """ - _use_mops: bool = False # declare type for TorchScript - if sparse: - if use_mops is None: - _use_mops = HAS_MOPS - # TODO: provide a warning once Mops is fully ready - # import warnings - # warnings.warn( - # "It is recommended to use MOPS for sparse accumulation. " - # " This can be installed with ``pip install" - # " git+https://github.com/lab-cosmo/mops`." - # " Falling back to numpy for now." - # ) - else: - if use_mops and not HAS_MOPS: - raise ImportError("Specified to use MOPS, but it is not installed.") - else: - _use_mops = use_mops - - else: - # The logic is a bit complicated so TorchScript can understand that it is - # not None - if use_mops is None: - _use_mops = False - # TODO: provide a warning once Mops is fully ready - # if HAS_MOPS: - # import warnings - # warnings.warn( - # "Mops is installed, but not being used" - # " as dense operations chosen." - # ) - elif use_mops: - raise ImportError("MOPS is not available for non sparse operations.") - else: - _use_mops = False - - if torch_jit_is_scripting(): - if not use_torch: - raise ValueError( - "use_torch is False, but this option is not supported when torch" - " scripted." - ) - _use_torch = True - else: - _use_torch = use_torch - - return sparse, _use_mops, _use_torch - - def _build_dense_cg_coeff_dict( lambda_max: int, complex_like: Array, double_like: Array, labels_values_like: Array ) -> Dict[int, Array]: """ Calculates the CG coefficients and stores them as dense arrays in a dictionary. - Each dictionary entry is a dense array with shape - (2 * l1 + 1, 2 * l2 + 1, 2 * lambda + 1). + Each dictionary entry is a dense array with shape (2 * l1 + 1, 2 * l2 + 1, 2 * + lambda + 1). { (l1, l2, lambda): @@ -206,19 +141,18 @@ def _build_dense_cg_coeff_dict( for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|)) } - where `cg_{m1, m2, mu}^{l1, l2, lambda}` is the Clebsch-Gordan coefficient - that describes the combination of the `m1` irreducible component of the `l1` - angular channel and the `m2` irreducible component of the `l2` angular - channel into the irreducible tensor of order `lambda`. In all cases, these - correspond to the non-zero CG coefficients, i.e. those in the range |-l, - ..., +l| for each angular order l in {l1, l2, lambda}. + where `cg_{m1, m2, mu}^{l1, l2, lambda}` is the Clebsch-Gordan coefficient that + describes the combination of the `m1` irreducible component of the `l1` angular + channel and the `m2` irreducible component of the `l2` angular channel into the + irreducible tensor of order `lambda`. In all cases, these correspond to the non-zero + CG coefficients, i.e. those in the range |-l, ..., +l| for each angular order l in + {l1, l2, lambda}. - :param lambda_max: maximum lambda value to compute CG coefficients for. + :param lambda_max: maximum angular momentum value to compute CG coefficients for. :param complex_like: an empty array of dtype complex, used for dispatching operations - :param double_like: an empty array of dtype double, used for dispatching - operations - :param labels_values_like: an empty array of dtype double, used for dispatching + :param double_like: an empty array of dtype double, used for dispatching operations + :param labels_values_like: an empty array of dtype int32, used for dispatching operations :returns: dictionary of dense CG coefficients. @@ -228,17 +162,17 @@ def _build_dense_cg_coeff_dict( c2r: Dict[int, Array] = {} coeff_dict = {} - for lambda_ in range(0, lambda_max + 1): - c2r[lambda_] = _complex2real(lambda_, like=complex_like) - r2c[lambda_] = _real2complex(lambda_, like=complex_like) + for o3_lambda in range(0, lambda_max + 1): + c2r[o3_lambda] = _complex2real(o3_lambda, like=complex_like) + r2c[o3_lambda] = _real2complex(o3_lambda, like=complex_like) for l1 in range(lambda_max + 1): for l2 in range(lambda_max + 1): - for lambda_ in range( + for o3_lambda in range( max(l1, l2) - min(l1, l2), min(lambda_max, (l1 + l2)) + 1 ): complex_cg = _complex_clebsch_gordan_matrix( - l1, l2, lambda_, complex_like + l1, l2, o3_lambda, complex_like ) real_cg = (r2c[l1].T @ complex_cg.reshape(2 * l1 + 1, -1)).reshape( @@ -251,14 +185,14 @@ def _build_dense_cg_coeff_dict( ) real_cg = real_cg.swapaxes(0, 1) - real_cg = real_cg @ c2r[lambda_].T + real_cg = real_cg @ c2r[o3_lambda].T - if (l1 + l2 + lambda_) % 2 == 0: + if (l1 + l2 + o3_lambda) % 2 == 0: cg_l1l2lam_dense = _dispatch.real(real_cg) else: cg_l1l2lam_dense = _dispatch.imag(real_cg) - coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam_dense + coeff_dict[(l1, l2, o3_lambda)] = cg_l1l2lam_dense return coeff_dict @@ -283,7 +217,7 @@ def _cg_coeff_dict_to_tensormap_dense( blocks.append( TensorBlock( values=_dispatch.contiguous(l1l2lam_values.reshape(block_value_shape)), - samples=Labels.range("sample", 1), + samples=Labels.range("_", 1), components=[ Labels( ["m1"], @@ -304,7 +238,7 @@ def _cg_coeff_dict_to_tensormap_dense( ).reshape(-1, 1), ), ], - properties=Labels.range("property", 1), + properties=Labels.range("cg_coefficient", 1), ) ) @@ -327,8 +261,8 @@ def _cg_coeff_dict_to_tensormap_sparse( blocks = [] # For each (l1, l2, lambda) combination, build a TensorBlock of non-zero CG coeffs - for l1, l2, lambda_ in dict_keys: - cg_l1l2lam_dense = coeff_dict[(l1, l2, lambda_)] + for l1, l2, o3_lambda in dict_keys: + cg_l1l2lam_dense = coeff_dict[(l1, l2, o3_lambda)] # Find the dense indices of the non-zero CG coeffs nonzeros_cg_coeffs_idx = _dispatch.where( @@ -360,7 +294,7 @@ def _cg_coeff_dict_to_tensormap_sparse( values=_dispatch.contiguous(values), samples=Labels(["m1", "m2", "mu"], l1l2lam_sample_values), components=[], - properties=Labels.range("property", 1), + properties=Labels.range("cg_coefficient", 1), ) ) @@ -372,73 +306,73 @@ def _cg_coeff_dict_to_tensormap_sparse( # ============================ -def _real2complex(lambda_: int, like: Array) -> Array: +def _real2complex(o3_lambda: int, like: Array) -> Array: """ - Computes a matrix that can be used to convert from real to complex-valued - spherical harmonics(coefficients) of order ``lambda_``. + Computes a matrix that can be used to convert from real to complex-valued spherical + harmonics(coefficients) of order ``o3_lambda``. - This is meant to be applied to the left: ``real2complex @ [-lambda_, ..., - +lambda_]``. + This is meant to be applied to the left: + ``real2complex @ [-o3_lambda, ..., +o3_lambda]``. - See https://en.wikipedia.org/wiki/Spherical_harmonics#Real_form for details - on the convention for how these tranformations are defined. + See https://en.wikipedia.org/wiki/Spherical_harmonics#Real_form for details on the + convention for how these tranformations are defined. Operations are dispatched to the corresponding array type given by ``like`` """ - result = _dispatch.zeros_like(like, shape=(2 * lambda_ + 1, 2 * lambda_ + 1)) + result = _dispatch.zeros_like(like, shape=(2 * o3_lambda + 1, 2 * o3_lambda + 1)) inv_sqrt_2 = 1.0 / math.sqrt(2.0) i_sqrt_2 = 1.0j / complex(math.sqrt(2.0)) - for m in range(-lambda_, lambda_ + 1): + for m in range(-o3_lambda, o3_lambda + 1): if m < 0: # Positve part - result[lambda_ + m, lambda_ + m] = i_sqrt_2 + result[o3_lambda + m, o3_lambda + m] = i_sqrt_2 # Negative part - result[lambda_ - m, lambda_ + m] = -i_sqrt_2 * ((-1) ** m) + result[o3_lambda - m, o3_lambda + m] = -i_sqrt_2 * ((-1) ** m) if m == 0: - result[lambda_, lambda_] = 1.0 + result[o3_lambda, o3_lambda] = 1.0 if m > 0: # Negative part - result[lambda_ - m, lambda_ + m] = inv_sqrt_2 + result[o3_lambda - m, o3_lambda + m] = inv_sqrt_2 # Positive part - result[lambda_ + m, lambda_ + m] = inv_sqrt_2 * ((-1) ** m) + result[o3_lambda + m, o3_lambda + m] = inv_sqrt_2 * ((-1) ** m) return result -def _complex2real(lambda_: int, like) -> Array: +def _complex2real(o3_lambda: int, like) -> Array: """ Converts from complex to real spherical harmonics. This is just given by the conjugate tranpose of the real->complex transformation matrices. Operations are dispatched to the corresponding array type given by ``like`` """ - return _dispatch.conjugate(_real2complex(lambda_, like)).T + return _dispatch.conjugate(_real2complex(o3_lambda, like)).T -def _complex_clebsch_gordan_matrix(l1: int, l2: int, lambda_: int, like: Array): +def _complex_clebsch_gordan_matrix(l1: int, l2: int, o3_lambda: int, like: Array): r"""clebsch-gordan matrix Computes the Clebsch-Gordan (CG) matrix for transforming complex-valued spherical harmonics. The CG matrix is computed as a 3D array of elements - < l1 m1 l2 m2 | lambda_ mu > + < l1 m1 l2 m2 | o3_lambda mu > where the first axis loops over m1, the second loops over m2, and the third one loops over mu. The matrix is real. For example, using the relation: - | l1 l2 lambda_ mu > = + | l1 l2 o3_lambda mu > = \sum_{m1, m2} - | l1 m1 > | l2 m2 > + | l1 m1 > | l2 m2 > (https://en.wikipedia.org/wiki/Clebsch–Gordan_coefficients, section "Formal definition of Clebsch-Gordan coefficients", eq 2) - one can obtain the spherical harmonics lambda_ from two sets of + one can obtain the spherical harmonics o3_lambda from two sets of spherical harmonics with l1 and l2 (up to a normalization factor). E.g.: Args: l1: l number for the first set of spherical harmonics l2: l number for the second set of spherical harmonics - lambda_: l number For the third set of spherical harmonics + o3_lambda: l number For the third set of spherical harmonics like: Operations are dispatched to the corresponding this arguments array type Returns: cg: CG matrix for transforming complex-valued spherical harmonics @@ -458,10 +392,10 @@ def _complex_clebsch_gordan_matrix(l1: int, l2: int, lambda_: int, like: Array): >>> np.allclose(ratio[0], ratio) True """ - if abs(l1 - l2) > lambda_ or abs(l1 + l2) < lambda_: - return _dispatch.zeros_like(like, (2 * l1 + 1, 2 * l2 + 1, 2 * lambda_ + 1)) + if abs(l1 - l2) > o3_lambda or abs(l1 + l2) < o3_lambda: + return _dispatch.zeros_like(like, (2 * l1 + 1, 2 * l2 + 1, 2 * o3_lambda + 1)) else: - return wigners.clebsch_gordan_array(l1, l2, lambda_) + return wigners.clebsch_gordan_array(l1, l2, o3_lambda) # ================================================= @@ -472,69 +406,66 @@ def _complex_clebsch_gordan_matrix(l1: int, l2: int, lambda_: int, like: Array): def combine_arrays( array_1: Array, array_2: Array, - lambda_: int, + o3_lambda: int, cg_coeffs: TensorMap, cg_backend: str, ) -> Array: """ - Couples arrays `array_1` and `array_2` corresponding to the irreducible - spherical components of 2 angular channels l1 and l2 using the appropriate - Clebsch-Gordan coefficients. As l1 and l2 can be combined to form multiple - lambda channels, this function returns the coupling to a single specified - channel `lambda`. The angular channels l1 and l2 are inferred from the size - of the components axis (axis 1) of the input arrays. - - `array_1` has shape (n_i, 2 * l1 + 1, n_p) and `array_2` has shape (n_i, 2 * - l2 + 1, n_q). n_i is the number of samples, n_p and n_q are the number of - properties in each array. The number of samples in each array must be the - same. - - The ouput array has shape (n_i, 2 * lambda + 1, n_p * n_q), where lambda is - the input parameter `lambda_`. - - The Clebsch-Gordan coefficients are cached in `cg_coeffs`. Currently, these - must be produced by the ClebschGordanReal class in this module. These - coefficients can be stored in either sparse dictionaries or dense arrays. - - The combination operation is dispatched such that numpy arrays or torch - tensors are automatically handled. - - `return_empty_array` can be used to return an empty array of the correct - shape, without performing the CG combination step. This can be useful for - probing the outputs of CG iterations in terms of metadata without the - computational cost of performing the CG combinations - i.e. using the - function :py:func:`combine_single_center_to_body_order_metadata_only`. - - :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 - + 1, n_q_properties] - :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 - + 1, n_p_properties] - :param lambda_: int value of the resulting coupled channel - :param cg_coeffs: either a sparse dictionary with keys (m1, m2, mu) and - array values being sparse blocks of shape , or a dense - array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + 1)]. If it is - None we only return an empty array of the shape. - :param cg_backend: specifies the combine backend with sparse CG - coefficients. It can have the values "python-dense", "python-sparse", - "mops" and "metadata". If "python-dense" or "python-sparse" is chosen, a - dense or sparse combination (respectively) of the arrays is performed - using either numpy or torch, depending on the backend. If "mops" is - chosen, a sparse combination of the arrays is performed if the external - package MOPS is installed. If "metadata" is chosen, no combination is - perfomed, and an empty array of the correct shape is returned. - - - :returns: array of shape [n_samples, (2*lambda_+1), q_properties * - p_properties] + Couples arrays `array_1` and `array_2` corresponding to the irreducible spherical + components of 2 angular channels l1 and l2 using the appropriate Clebsch-Gordan + coefficients. As l1 and l2 can be combined to form multiple lambda channels, this + function returns the coupling to a single specified channel `lambda`. The angular + channels l1 and l2 are inferred from the size of the components axis (axis 1) of the + input arrays. + + `array_1` has shape (n_i, 2 * l1 + 1, n_p) and `array_2` has shape (n_i, 2 * l2 + 1, + n_q). n_i is the number of samples, n_p and n_q are the number of properties in each + array. The number of samples in each array must be the same. + + The ouput array has shape (n_i, 2 * lambda + 1, n_p * n_q), where lambda is the + input parameter `o3_lambda`. + + The Clebsch-Gordan coefficients are cached in `cg_coeffs`. Currently, these must be + produced by the ClebschGordanReal class in this module. These coefficients can be + stored in either sparse dictionaries or dense arrays. + + The combination operation is dispatched such that numpy arrays or torch tensors are + automatically handled. + + `return_empty_array` can be used to return an empty array of the correct shape, + without performing the CG combination step. This can be useful for probing the + outputs of CG iterations in terms of metadata without the computational cost of + performing the CG combinations - i.e. using the function + :py:func:`combine_single_center_to_body_order_metadata_only`. + + :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + 1, + n_q_properties] + :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1, + n_p_properties] + :param o3_lambda: int value of the resulting coupled channel + :param cg_coeffs: a :py:class:`TensorMap` containing CG coefficients in a format for + either sparse or dense CG tensor products, as returned by + :py:func:`calculate_cg_coefficients`. See the function docstring for details on + the data structure. Only used if ``cg_backend`` is not ``"metadata"``. + :param cg_backend: specifies the combine backend with sparse CG coefficients. It can + have the values "python-dense", "python-sparse", "mops" and "metadata". If + "python-dense" or "python-sparse" is chosen, a dense or sparse combination + (respectively) of the arrays is performed using either numpy or torch, depending + on the backend. If "mops" is chosen, a sparse combination of the arrays is + performed if the external package MOPS is installed. If "metadata" is chosen, no + combination is perfomed, and an empty array of the correct shape is returned. + + + :returns: array of shape [n_samples, (2*o3_lambda+1), q_properties * p_properties] """ # If just precomputing metadata, return an empty array if cg_backend == "metadata": - return empty_combine(array_1, array_2, lambda_) + return empty_combine(array_1, array_2, o3_lambda) if cg_backend == "python-sparse" or cg_backend == "mops": - return sparse_combine(array_1, array_2, lambda_, cg_coeffs, cg_backend) + return sparse_combine(array_1, array_2, o3_lambda, cg_coeffs, cg_backend) elif cg_backend == "python-dense": - return dense_combine(array_1, array_2, lambda_, cg_coeffs) + return dense_combine(array_1, array_2, o3_lambda, cg_coeffs) else: raise ValueError( f"Wrong cg_backend, got '{cg_backend}'," @@ -545,10 +476,11 @@ def combine_arrays( def empty_combine( array_1: Array, array_2: Array, - lambda_: int, + o3_lambda: int, ) -> Array: """ - Returns a Clebsch-Gordan combination step on two arrays using sparse operations + Returns an empty array of the correct shape, imitating the output array shape + produced by a CG combination of ``array_1`` and ``array_2``. """ # Samples dimensions must be the same assert array_1.shape[0] == array_2.shape[0] @@ -558,40 +490,40 @@ def empty_combine( n_p = array_1.shape[2] # number of properties in array_1 n_q = array_2.shape[2] # number of properties in array_2 - return _dispatch.empty_like(array_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) + return _dispatch.empty_like(array_1, (n_i, 2 * o3_lambda + 1, n_p * n_q)) def sparse_combine( array_1: Array, array_2: Array, - lambda_: int, + o3_lambda: int, cg_coeffs: TensorMap, cg_backend: str, ) -> Array: """ - Performs a Clebsch-Gordan combination step on 2 arrays using sparse - operations. The angular channel of each block is inferred from the size of - its component axis, and the blocks are combined to the desired output - angular channel `lambda_` using the appropriate Clebsch-Gordan coefficients. - - :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 - + 1, n_q_properties] - :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 - + 1, n_p_properties] - :param lambda_: int value of the resulting coupled channel - :param cg_coeffs: sparse dictionary with keys (m1, m2, mu) and array values - being sparse blocks of shape - :param cg_backend: specifies the combine backend with sparse CG - coefficients. It can have the values "python-dense", "python-sparse", - "mops" and "metadata". If "python-dense" or "python-sparse" is chosen, a - dense or sparse combination (respectively) of the arrays is performed - using either numpy or torch, depending on the backend. If "mops" is - chosen, a sparse combination of the arrays is performed if the external - package MOPS is installed. If "metadata" is chosen, no combination is - perfomed, and an empty array of the correct shape is returned. - - :returns: array of shape [n_samples, (2*lambda_+1), q_properties * - p_properties] + Performs a Clebsch-Gordan combination step on 2 arrays using sparse operations. The + angular channel of each block is inferred from the size of its component axis, and + the blocks are combined to the desired output angular channel `o3_lambda` using the + appropriate Clebsch-Gordan coefficients. + + :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + 1, + n_q_properties] + :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1, + n_p_properties] + :param o3_lambda: int value of the resulting coupled channel + :param cg_coeffs: a :py:class:`TensorMap` containing CG coefficients in a format for + either sparse or dense CG tensor products, as returned by + :py:func:`calculate_cg_coefficients`. See the function docstring for details on + the data structure. Only used if ``cg_backend`` is not ``"metadata"``. + :param cg_backend: specifies the combine backend with sparse CG coefficients. It can + have the values "python-dense", "python-sparse", "mops" and "metadata". If + "python-dense" or "python-sparse" is chosen, a dense or sparse combination + (respectively) of the arrays is performed using either numpy or torch, depending + on the backend. If "mops" is chosen, a sparse combination of the arrays is + performed if the external package MOPS is installed. If "metadata" is chosen, no + combination is perfomed, and an empty array of the correct shape is returned. + + :returns: array of shape [n_samples, (2*o3_lambda+1), q_properties * p_properties] """ # Samples dimensions must be the same assert array_1.shape[0] == array_2.shape[0] @@ -610,11 +542,11 @@ def sparse_combine( # can be made more straightforward once MOPS support TorchScript if isinstance(array_1, TorchTensor) or cg_backend == "python-sparse": # Initialise output array - array_out = _dispatch.zeros_like(array_1, (n_i, 2 * lambda_ + 1, n_p * n_q)) + array_out = _dispatch.zeros_like(array_1, (n_i, 2 * o3_lambda + 1, n_p * n_q)) # Get the corresponding Clebsch-Gordan coefficients # Fill in each mu component of the output array in turn - cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}) + cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": o3_lambda}) for i in range(len(cg_l1l2lam.samples)): m1m2mu_key = cg_l1l2lam.samples.entry(i) m1 = m1m2mu_key[0] @@ -645,7 +577,7 @@ def sparse_combine( # We also need to pass SAP the CG coefficients and m1, m2, and mu indices as 1D # arrays. Extract these from the corresponding TensorBlock in the TensorMap CG # cache. - block = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}) + block = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": o3_lambda}) samples = block.samples m1_arr: List[int] = [] @@ -667,12 +599,12 @@ def sparse_combine( indices_A=m1_arr, indices_B=m2_arr, indices_output=mu_arr, - output_size=2 * lambda_ + 1, + output_size=2 * o3_lambda + 1, ) - assert array_out.shape == (n_i * n_p * n_q, 2 * lambda_ + 1) + assert array_out.shape == (n_i * n_p * n_q, 2 * o3_lambda + 1) # Reshape back - array_out = array_out.reshape(n_i, n_p * n_q, 2 * lambda_ + 1) + array_out = array_out.reshape(n_i, n_p * n_q, 2 * o3_lambda + 1) array_out = _dispatch.swapaxes(array_out, 1, 2) return array_out @@ -688,31 +620,33 @@ def sparse_combine( def dense_combine( array_1: Array, array_2: Array, - lambda_: int, + o3_lambda: int, cg_coeffs: TensorMap, ) -> Array: """ - Performs a Clebsch-Gordan combination step on 2 arrays using a dense - operation. The angular channel of each block is inferred from the size of - its component axis, and the blocks are combined to the desired output - angular channel `lambda_` using the appropriate Clebsch-Gordan coefficients. - - :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 - + 1, n_q_properties] - :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 - + 1, n_p_properties] - :param lambda_: int value of the resulting coupled channel - :param cg_coeffs: dense array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * - lambda_ + 1)] - - :returns: array of shape [n_samples, (2*lambda_+1), q_properties * + Performs a Clebsch-Gordan combination step on 2 arrays using a dense operation. The + angular channel of each block is inferred from the size of its component axis, and + the blocks are combined to the desired output angular channel `o3_lambda` using the + appropriate Clebsch-Gordan coefficients. + + :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + 1, + n_q_properties] + :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1, + n_p_properties] + :param o3_lambda: int value of the resulting coupled channel + :param cg_coeffs: a :py:class:`TensorMap` containing CG coefficients in a format for + either sparse or dense CG tensor products, as returned by + :py:func:`calculate_cg_coefficients`. See the function docstring for details on + the data structure. Only used if ``cg_backend`` is not ``"metadata"``. + + :returns: array of shape [n_samples, (2 * o3_lambda + 1), q_properties * p_properties] """ # Infer l1 and l2 from the len of the length of axis 1 of each tensor l1 = (array_1.shape[1] - 1) // 2 l2 = (array_2.shape[1] - 1) // 2 - cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lambda_}).values + cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": o3_lambda}).values # (samples None None l1_mu q) * (samples l2_mu p None None) # -> (samples l2_mu p l1_mu q) we broadcast it in this way @@ -730,7 +664,7 @@ def dense_combine( ) # (l1_mu l2_mu lam_mu) -> ((l1_mu l2_mu) lam_mu) - cg_l1l2lam = cg_l1l2lam.reshape(-1, 2 * lambda_ + 1) + cg_l1l2lam = cg_l1l2lam.reshape(-1, 2 * o3_lambda + 1) # (samples (q p) (l1_mu l2_mu)) @ ((l1_mu l2_mu) lam_mu) # -> samples (q p) lam_mu diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index 924ae3f7d..86eb3e5ba 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -7,16 +7,6 @@ from typing import List, Optional, Tuple, Union from .. import _dispatch - -# from ._classes import ( -# Array, -# Labels, -# LabelsEntry, -# TensorBlock, -# TensorMap, -# is_labels, -# torch_jit_annotate, -# ) from .._backend import ( Array, Labels, diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index a601473f9..71519f944 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -183,21 +183,9 @@ def __init__( "Must be greater equal 0." ) self._max_angular = max_angular - - if self._cg_backend == "python-dense": - sparse = False - use_mops = False - elif self._cg_backend == "python-sparse": - sparse = True - use_mops = False - elif self._cg_backend == "mops": - sparse = True - use_mops = True - self._cg_coefficients = _cg_cache.calculate_cg_coefficients( lambda_max=self._max_angular, - sparse=sparse, - use_mops=use_mops, + sparse=(self._cg_backend == "python-sparse" or self._cg_backend == "mops"), use_torch=(arrays_backend == "torch"), ) From 44ded8cf4c20bcbc8a1519c935de462aaa91d1db Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Wed, 21 Feb 2024 17:53:20 +0100 Subject: [PATCH 23/23] Final review comment --- .../rascaline/utils/clebsch_gordan/correlate_density.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index 71519f944..a2181186b 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -185,7 +185,7 @@ def __init__( self._max_angular = max_angular self._cg_coefficients = _cg_cache.calculate_cg_coefficients( lambda_max=self._max_angular, - sparse=(self._cg_backend == "python-sparse" or self._cg_backend == "mops"), + sparse=(self._cg_backend in ["python-sparse", "mops"]), use_torch=(arrays_backend == "torch"), )