diff --git a/REFERENCES.md b/REFERENCES.md index ddbda90a..0fa27e7e 100644 --- a/REFERENCES.md +++ b/REFERENCES.md @@ -8,3 +8,5 @@ * Danby, J. M. A. (1992). Fundamentals of Celestial Mechanics. 2nd ed., William-Bell, Inc. ISBN-13: 978-0943396200 Notes: of particular interest is Danby's fantastic chapter on universal variables (6.9) +* Wan, E. A; Van Der Merwe, R. (2000). The unscented Kalman filter for nonlinear estimation. + Proceedings of the IEEE 2000 Adaptive Systems for Signal Processing, Communications, and Control Symposium, 153-158. https://doi.org/10.1109/ASSPCC.2000.882463 diff --git a/adam_core/coordinates/__init__.py b/adam_core/coordinates/__init__.py index e4b481cb..c11849f8 100644 --- a/adam_core/coordinates/__init__.py +++ b/adam_core/coordinates/__init__.py @@ -8,6 +8,7 @@ covariances_to_df, covariances_to_table, sample_covariance, + sample_covariance_sigma_points, sigmas_from_df, sigmas_to_df, transform_covariances_jacobian, diff --git a/adam_core/coordinates/covariances.py b/adam_core/coordinates/covariances.py index 9ca1693a..8c89e812 100644 --- a/adam_core/coordinates/covariances.py +++ b/adam_core/coordinates/covariances.py @@ -1,5 +1,5 @@ import logging -from typing import Callable, List +from typing import Callable, List, Tuple import numpy as np import pandas as pd @@ -7,6 +7,7 @@ from astropy import units as u from astropy.table import Table as AstropyTable from quivr import FixedSizeListColumn, Table +from scipy.linalg import sqrtm from scipy.stats import multivariate_normal from .jacobian import calc_jacobian @@ -238,6 +239,85 @@ def sample_covariance( return samples +def sample_covariance_sigma_points( + mean: np.ndarray, cov: np.ndarray, alpha: float = 1, kappa: float = 0.0 +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Create sigma-point samples of a multivariate Gaussian distribution + with given mean and covariances. + + Parameters + ---------- + mean : `~numpy.ndarray` (D) + Multivariate mean of the Gaussian distribution. + cov : `~numpy.ndarray` (D, D) + Multivariate variance-covariance matrix of the Gaussian distribution. + alpha : float, optional + Spread of the sigma points between 1e^-2 and 1. + kappa : float, optional + Secondary scaling parameter usually set to 0. + + Returns + ------- + sigma_points : `~numpy.ndarray` (2 * D + 1, D) + Sigma points drawn from the distribution. + W: `~numpy.ndarray` (2 * D + 1) + Weights of the sigma points. + W_cov: `~numpy.ndarray` (2 * D + 1) + Weights of the sigma points to reconstruct covariance matrix. + + References + ---------- + [1] Wan, E. A; Van Der Merwe, R. (2000). The unscented Kalman filter for nonlinear estimation. + Proceedings of the IEEE 2000 Adaptive Systems for Signal Processing, + Communications, and Control Symposium, 153-158. + https://doi.org/10.1109/ASSPCC.2000.882463 + """ + # Calculate the dimensionality of the distribution + D = mean.shape[0] + + # See equation 15 in Wan & Van Der Merwe (2000) [1] + N = 2 * D + 1 + sigma_points = np.empty((N, D)) + W = np.empty(N) + W_cov = np.empty(N) + + # Calculate the scaling parameter lambda + lambd = alpha**2 * (D + kappa) - D + + # First sigma point is the mean + sigma_points[0] = mean + + # Beta is used to encode prior knowledge about the distribution. + # If the distribution is a well-constrained Gaussian, beta = 2 is optimal + # but lets set beta to 0 for now which has the effect of not weighting the mean state + # with 0 for the covariance matrix. This is generally better for more distributions. + beta = 0 + # Calculate the weights for mean and the covariance matrix + # Weight are used to reconstruct the mean and covariance matrix from the sigma points + W[0] = lambd / (D + lambd) + W_cov[0] = lambd / (D + lambd) + (1 - alpha**2 + beta) + + # Take the matrix square root of the scaled covariance matrix. + # Sometimes you'll see this done with a Cholesky decomposition for speed + # but sqrtm is sufficiently optimized for this use case and typically provides + # better results + L = sqrtm((D + lambd) * cov) + + # Calculate the remaining sigma points + for i in range(D): + offset = L[i] + sigma_points[i + 1] = mean + offset + sigma_points[i + 1 + D] = mean - offset + + # The weights for the remaining sigma points are the same + # for the mean and the covariance matrix + W[1:] = 1 / (2 * (D + lambd)) + W_cov[1:] = 1 / (2 * (D + lambd)) + + return sigma_points, W, W_cov + + def transform_covariances_sampling( coords: np.ndarray, covariances: np.ndarray, diff --git a/adam_core/coordinates/tests/test_covariances.py b/adam_core/coordinates/tests/test_covariances.py index 511361f2..455a9ac5 100644 --- a/adam_core/coordinates/tests/test_covariances.py +++ b/adam_core/coordinates/tests/test_covariances.py @@ -1,6 +1,36 @@ import numpy as np -from ..covariances import CoordinateCovariances +from ...utils.helpers.orbits import make_real_orbits +from ..covariances import CoordinateCovariances, sample_covariance_sigma_points + + +def test_sample_covariance_sigma_points(): + # Get a sample of real orbits and test that sigma point sampling + # allows the state vector and its covariance to be reconstructed + orbits = make_real_orbits() + + for orbit in orbits: + mean = orbit.coordinates.values[0] + covariance = orbit.coordinates.covariance.to_matrix()[0] + + samples, W, W_cov = sample_covariance_sigma_points(mean, covariance) + + # In a 6 dimensional space we expect 13 sigma point samples + assert len(samples) == 13 + # The first sample should be the mean + np.testing.assert_equal(samples[0], mean) + # The first weight should be 0.0 + assert W[0] == 0.0 + # The first weight for the covariance should be 0 + # since beta = 0 internally + assert W_cov[0] == 0.0 + + # Reconstruct the mean and covariance + mean_sg = np.dot(W, samples) + np.testing.assert_allclose(mean_sg, mean, rtol=0, atol=1e-14) + + cov_sg = np.cov(samples, aweights=W_cov, rowvar=False, bias=True) + np.testing.assert_allclose(cov_sg, covariance, rtol=0, atol=1e-14) def test_CoordinateCovariances_from_sigmas():