diff --git a/markovflow/kalman_filter.py b/markovflow/kalman_filter.py index e26f2e6..9fb4d1d 100644 --- a/markovflow/kalman_filter.py +++ b/markovflow/kalman_filter.py @@ -96,9 +96,8 @@ def _k_inv_post(self): # The emission matrix is tiled across the time_points, so for a time invariant matrix # this is equivalent to Gᵀ Σ⁻¹ G = (I_N ⊗ HᵀR⁻¹H), likelihood_precision = SymmetricBlockTriDiagonal(h_t_r_h) - _k_inv_prior = self.prior_ssm.precision # K⁻¹ + GᵀΣ⁻¹G - return _k_inv_prior + likelihood_precision + return self._k_inv_prior + likelihood_precision @property def _log_det_observation_precision(self): @@ -495,3 +494,121 @@ def _log_det_observation_precision(self): def observations(self): """ Observation vector """ return self.sites.means + + +@tf_scope_class_decorator +class KalmanFilterWithSparseSites(BaseKalmanFilter): + r""" + Performs a Kalman filter on a :class:`~markovflow.state_space_model.StateSpaceModel` + and :class:`~markovflow.emission_model.EmissionModel`, with Gaussian sites, over a time grid. + """ + + def __init__(self, state_space_model: StateSpaceModel, emission_model: EmissionModel, sites: GaussianSites, + num_grid_points: int, observations_index: tf.Tensor, observations: tf.Tensor): + """ + :param state_space_model: Parameterises the latent chain. + :param emission_model: Maps the latent chain to the observations. + :param sites: Gaussian sites over the observations. + :param num_grid_points: number of grid points. + :param observations_index: Index of the observations in the time grid with shape (N,). + :param observations: Sparse observations with shape (N, output_dim). + """ + self.sites = sites + self.observations_index = observations_index + self.sparse_observations = observations + self.grid_shape = tf.TensorShape((num_grid_points, 1)) + super().__init__(state_space_model, emission_model) + + @property + def _r_inv(self): + """ + Precisions of the observation model over the time grid. + """ + data_sites_precision = self.sites.precisions + return self.sparse_to_dense(data_sites_precision, output_shape=self.grid_shape + (1,)) + + @property + def _log_det_observation_precision(self): + """ + Sum of log determinant of the precisions of the observation model. It only calculates for the data_sites as + other sites precision is anyways zero. + """ + return tf.reduce_sum(tf.linalg.logdet(self._r_inv_data), axis=-1) + + @property + def observations(self): + """ Sparse observation vector """ + return self.sparse_observations + + @property + def _r_inv_data(self): + """ + Precisions of the observation model for only the data sites. + """ + return self.sites.precisions + + def sparse_to_dense(self, tensor: tf.Tensor, output_shape: tf.TensorShape) -> tf.Tensor: + """ + Convert a sparse tensor to a dense one on the basis of observations index, output tensor is of the output_shape. + """ + return tf.scatter_nd(self.observations_index, tensor, output_shape) + + def dense_to_sparse(self, tensor: tf.Tensor) -> tf.Tensor: + """ + Convert a dense tensor to a sparse one on the basis of observations index. + """ + tensor_shape = tensor.shape + expand_dims = len(tensor_shape) == 3 + + tensor = tf.gather_nd(tf.reshape(tensor, (-1, 1)), self.observations_index) + if expand_dims: + tensor = tf.expand_dims(tensor, axis=-1) + return tensor + + def log_likelihood(self) -> tf.Tensor: + r""" + Construct a TensorFlow function to compute the likelihood. + + For more mathematical details, look at the log_likelihood function of the parent class. + The main difference from the parent class are that the vector of observations is now sparse. + + :return: The likelihood as a scalar tensor (we sum over the `batch_shape`). + """ + # K⁻¹ + GᵀΣ⁻¹G = LLᵀ. + l_post = self._k_inv_post.cholesky + num_data = self.observations_index.shape[0] + + # Hμ [..., num_transitions + 1, output_dim] + marginal = self.emission.project_state_to_f(self.prior_ssm.marginal_means) + + # y = obs - Hμ [..., num_transitions + 1, output_dim] + disp = self.sparse_to_dense(self.observations, marginal.shape) - marginal + disp_data = self.sparse_observations - self.dense_to_sparse(marginal) + + # cst is the constant term for a gaussian log likelihood + cst = ( + -0.5 * np.log(2 * np.pi) * tf.cast(self.emission.output_dim * num_data, default_float()) + ) + + term1 = -0.5 * tf.reduce_sum( + input_tensor=tf.einsum("...op,...p,...o->...o", self._r_inv_data, disp_data, disp_data), axis=[-1, -2] + ) + + # term 2 is: ½|L⁻¹(GᵀΣ⁻¹)y|² + # (GᵀΣ⁻¹)y [..., num_transitions + 1, state_dim] + obs_proj = self._back_project_y_to_state(disp) + + # ½|L⁻¹(GᵀΣ⁻¹)y|² [...] + term2 = 0.5 * tf.reduce_sum( + input_tensor=tf.square(l_post.solve(obs_proj, transpose_left=False)), axis=[-1, -2] + ) + + ## term 3 is: ½log |K⁻¹| - log |L| + ½ log |Σ⁻¹| + # where log |Σ⁻¹| = num_data * log|R⁻¹| + term3 = ( + 0.5 * self.prior_ssm.log_det_precision() + - l_post.abs_log_det() + + 0.5 * self._log_det_observation_precision + ) + + return tf.reduce_sum(cst + term1 + term2 + term3) diff --git a/tests/integration/test_kalman_filter_with_sparse_sites.py b/tests/integration/test_kalman_filter_with_sparse_sites.py new file mode 100644 index 0000000..d88407e --- /dev/null +++ b/tests/integration/test_kalman_filter_with_sparse_sites.py @@ -0,0 +1,69 @@ +import numpy as np +import tensorflow as tf +import pytest +from gpflow.config import default_float + +from markovflow.kernels.matern import Matern12 +from markovflow.mean_function import LinearMeanFunction +from markovflow.models.gaussian_process_regression import GaussianProcessRegression +from markovflow.kalman_filter import KalmanFilterWithSparseSites, UnivariateGaussianSitesNat +from markovflow.likelihoods import MultivariateGaussian + +@pytest.fixture( + name="time_step_homogeneous", params=[(0.01, True), (0.01, False), (0.001, True), (0.001, False)], +) +def _time_step_homogeneous_fixture(request): + return request.param + + +@pytest.fixture(name="kalman_gpr_setup") +def _setup(batch_shape, time_step_homogeneous): + """ + Create a Gaussian Process model and an equivalent kalman filter model + with more latent states than observations. + FIXME: Currently batch_shape isn't used. + """ + dt, homogeneous = time_step_homogeneous + + time_grid = np.arange(0.0, 1.0, dt) + if not homogeneous: + time_grid = np.sort(np.random.choice(time_grid, 50, replace=False)) + + time_points = time_grid[::10] + observations = np.sin(12 * time_points[..., None]) + np.random.randn(len(time_points), 1) * 0.1 + + input_data = ( + tf.constant(time_points, dtype=default_float()), + tf.constant(observations, dtype=default_float()), + ) + + observation_covariance = 1.0 # Same as GPFlow default + kernel = Matern12(lengthscale=1.0, variance=1.0, output_dim=observations.shape[-1]) + kernel.set_state_mean(tf.random.normal((1,), dtype=default_float())) + gpr_model = GaussianProcessRegression( + input_data=input_data, + kernel=kernel, + mean_function=LinearMeanFunction(1.1), + chol_obs_covariance=tf.constant([[np.sqrt(observation_covariance)]], dtype=default_float()), + ) + + prior_ssm = kernel.state_space_model(time_grid) + emission_model = kernel.generate_emission_model(time_grid) + observations_index = tf.where(tf.equal(time_grid[..., None], time_points))[:, 0][..., None] + + observations -= gpr_model.mean_function(time_points) + + nat1 = observations / observation_covariance + nat2 = (-0.5 / observation_covariance) * tf.ones_like(nat1)[..., None] + lognorm = tf.zeros_like(nat1) + sites = UnivariateGaussianSitesNat(nat1=nat1, nat2=nat2, log_norm=lognorm) + + kf_sparse_sites = KalmanFilterWithSparseSites(prior_ssm, emission_model, sites, time_grid.shape[0], + observations_index, observations) + + return gpr_model, kf_sparse_sites + +def test_kalman_loglikelihood(with_tf_random_seed, kalman_gpr_setup): + gpr_model, kf_sparse_sites = kalman_gpr_setup + + np.testing.assert_allclose(gpr_model.log_likelihood(), kf_sparse_sites.log_likelihood())