Skip to content

Commit

Permalink
Implement whitened representation of q(u) (#8)
Browse files Browse the repository at this point in the history
- new feature: add `whiten` keyword argument to `VBPP` constructor with which the whitened representation of q(u) can be turned on.
- bugfix: add `full_output_cov` keyword argument to `predict_f` (though only `False` is implemented) so that the `predict_f_samples` method can be used
- docs: add note on matrix inversion issues (solution: fix inducing point locations).
- improved tests
  • Loading branch information
st-- authored Apr 21, 2022
1 parent c91892e commit 85ad11d
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 15 deletions.
5 changes: 4 additions & 1 deletion demo/demo_coal_1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,14 @@ def build_model(events, domain, M=20):
kernel = gpflow.kernels.SquaredExponential()
Z = domain_grid(domain, M)
feature = gpflow.inducing_variables.InducingPoints(Z)
gpflow.set_trainable(feature, False)
q_mu = np.zeros(M)
q_S = np.eye(M)
num_events = len(events)
beta0 = np.sqrt(num_events / domain_area(domain))
model = VBPP(feature, kernel, domain, q_mu, q_S, beta0=beta0, num_events=num_events)
model = VBPP(
feature, kernel, domain, q_mu, q_S, beta0=beta0, num_events=num_events, whiten=True
)
return model


Expand Down
93 changes: 93 additions & 0 deletions tests/test_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright (C) 2022 ST John
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pytest
import tensorflow as tf
from gpflow.inducing_variables import InducingPoints
from gpflow.kernels import SquaredExponential
import gpflow
from vbpp import VBPP

rng = np.random.RandomState(0)


class Data:
domain = np.array([[0.0, 10.0]])
events = rng.uniform(0, 10, size=20)[:, None]
Z = np.linspace(0, 10, 17)[:, None]
Xtest = np.linspace(-2, 12, 37)[:, None]


@pytest.mark.parametrize("whiten", [True, False])
def test_elbo_terms_at_initialization(whiten):
kernel = SquaredExponential()
feature = InducingPoints(Data.Z)
M = feature.num_inducing
m_init = np.zeros(M)
S_init = np.eye(M) if whiten else kernel(Data.Z, full_cov=True)
m = VBPP(feature, kernel, Data.domain, m_init, S_init, whiten=whiten)

Kuu = m.compute_Kuu()
assert np.allclose(m.prior_kl(Kuu).numpy(), 0.0)
assert np.allclose(m._elbo_integral_term(Kuu).numpy(), -m.total_area)


def test_equivalence_of_whitening():
kernel = SquaredExponential()
feature = InducingPoints(Data.Z)

M = feature.num_inducing
np.random.seed(42)
m_init = np.random.randn(M)
S_init = (lambda A: A @ A.T)(np.random.randn(M, M))

Kuu = kernel(Data.Z)
L = np.linalg.cholesky(Kuu.numpy())

beta0 = 1.234
m_whitened = VBPP(feature, kernel, Data.domain, m_init, S_init, whiten=True, beta0=beta0)
m_unwhitened = VBPP(
feature, kernel, Data.domain, L @ m_init, L @ S_init @ L.T, whiten=False, beta0=beta0
)

Xnew = np.linspace(-3, 13, 17)[:, None]
f_mean_whitened, f_var_whitened = m_whitened.predict_f(Xnew)
f_mean_unwhitened, f_var_unwhitened = m_unwhitened.predict_f(Xnew)
np.testing.assert_allclose(f_mean_whitened, f_mean_unwhitened, rtol=1e-3)
np.testing.assert_allclose(f_var_whitened, f_var_unwhitened, rtol=2e-3)

np.testing.assert_allclose(
m_whitened.elbo(Data.events), m_unwhitened.elbo(Data.events), rtol=1e-6
)


@pytest.mark.parametrize("whiten", [True, False])
def test_lambda_predictions(whiten):
kernel = SquaredExponential()
feature = InducingPoints(Data.Z)

M = feature.num_inducing
np.random.seed(42)
m_init = np.random.randn(M)
S_init = (lambda A: A @ A.T)(np.random.randn(M, M))
beta0 = 1.234

m = VBPP(feature, kernel, Data.domain, m_init, S_init, whiten=whiten, beta0=beta0)

mean, lower, upper = m.predict_lambda_and_percentiles(Data.Xtest)
mean_again = m.predict_lambda(Data.Xtest)
np.testing.assert_allclose(mean, mean_again)
np.testing.assert_array_less(lower, mean)
np.testing.assert_array_less(mean, upper)
21 changes: 12 additions & 9 deletions tests/test_smoke.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (C) 2022 ST John
# Copyright (C) Secondmind Ltd 2017
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,8 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import numpy as np
import tensorflow as tf
from gpflow.inducing_variables import InducingPoints
from gpflow.kernels import SquaredExponential
import gpflow
Expand All @@ -22,20 +23,22 @@
rng = np.random.RandomState(0)


def test_smoke():
@pytest.mark.parametrize("whiten", [True, False])
def test_smoke_optimize_and_predict(whiten):
domain = np.array([[0.0, 10.0]])
kernel = SquaredExponential()
events = rng.uniform(0, 10, size=20)[:, None]
feature = InducingPoints(np.linspace(0, 10, 20)[:, None])
M = len(feature)
m = VBPP(feature, kernel, domain, np.zeros(M), np.eye(M))

Kuu = m.compute_Kuu()
m.q_sqrt.assign(np.linalg.cholesky(Kuu))
assert np.allclose(m.prior_kl(tf.identity(Kuu)).numpy(), 0.0)
kernel = SquaredExponential()
Z = np.linspace(0, 10, 17)[:, None]
feature = InducingPoints(Z)
M = feature.num_inducing
m = VBPP(feature, kernel, domain, np.zeros(M), np.eye(M), whiten=whiten)

def objective_closure():
return -m.elbo(events)

opt = gpflow.optimizers.Scipy()
opt.minimize(objective_closure, m.trainable_variables, options=dict(maxiter=2))

X = np.linspace(-1, 11, 19)[:, None]
_ = m.predict_f_samples(X)
35 changes: 30 additions & 5 deletions vbpp/model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (C) 2022 ST John
# Copyright (C) Secondmind Ltd 2017-2020
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -61,6 +62,11 @@ class VBPP(gpflow.models.GPModel, gpflow.models.ExternalDataTrainingLossMixin):
Implementation of the "Variational Bayes for Point Processes" model by
Lloyd et al. (2015), with capability for multiple observations and the
constant offset `beta0` from John and Hensman (2018).
Note: If you encounter "Input matrix is not invertible." errors during
training, this may be due to inducing points moving too close to each
other, especially in 1D. You may want to consider fixing the inducing
points, e.g. on a grid.
"""

def __init__(
Expand All @@ -74,6 +80,7 @@ def __init__(
beta0: float = 1e-6,
num_observations: int = 1,
num_events: Optional[int] = None,
whiten: bool = False,
):
"""
D = number of dimensions
Expand All @@ -100,6 +107,10 @@ def __init__(
:param num_events: total number of events, defaults to events.shape[0]
(relevant when feeding in minibatches)
:param whiten: whether to use the whitened representation of q(u).
When whiten=True, we parametrise q(v) = N(q_mu, q_S) instead, and u = L v,
where L is the lower-triangular Cholesky factor of the kernel matrix Kuu.
"""
super().__init__(
kernel,
Expand Down Expand Up @@ -138,6 +149,8 @@ def __init__(

self.psi_jitter = 0.0

self.whiten = whiten

def _Psi_matrix(self):
Ψ = tf_calc_Psi_matrix(self.kernel, self.inducing_variable, self.domain)
psi_jitter_matrix = self.psi_jitter * tf.eye(
Expand All @@ -149,18 +162,21 @@ def _Psi_matrix(self):
def total_area(self):
return np.prod(self.domain[:, 1] - self.domain[:, 0])

def predict_f(self, Xnew, full_cov=False, *, Kuu=None):
def predict_f(self, Xnew, full_cov=False, *, full_output_cov=False, Kuu=None):
"""
VBPP-specific conditional on the approximate posterior q(u), including a
constant mean function.
"""
if full_output_cov:
raise NotImplementedError("only supports single-output models")
mean, var = conditional(
Xnew,
self.inducing_variable,
self.kernel,
self.q_mu[:, None],
full_cov=full_cov,
q_sqrt=self.q_sqrt[None, :, :],
white=self.whiten,
)
# TODO make conditional() use Kuu if available

Expand Down Expand Up @@ -201,7 +217,10 @@ def _elbo_integral_term(self, Kuu):

# Kzz⁻¹ m = R^-T R⁻¹ m
# Rinv_m = R⁻¹ m
Rinv_m = tf.linalg.triangular_solve(R, self.q_mu[:, None], lower=True)
if self.whiten:
Rinv_m = self.q_mu[:, None]
else:
Rinv_m = tf.linalg.triangular_solve(R, self.q_mu[:, None], lower=True)

# R⁻¹ Ψ R^-T
# = (R⁻¹ Ψ) R^-T
Expand All @@ -211,8 +230,11 @@ def _elbo_integral_term(self, Kuu):

int_mean_f_sqr = tf_vec_mat_vec_mul(Rinv_m, Rinv_Ψ_RinvT, Rinv_m)

Rinv_L = tf.linalg.triangular_solve(R, self.q_sqrt, lower=True)
Rinv_L_LT_RinvT = tf.matmul(Rinv_L, Rinv_L, transpose_b=True)
if self.whiten:
Rinv_L_LT_RinvT = tf.matmul(self.q_sqrt, self.q_sqrt, transpose_b=True)
else:
Rinv_L = tf.linalg.triangular_solve(R, self.q_sqrt, lower=True)
Rinv_L_LT_RinvT = tf.matmul(Rinv_L, Rinv_L, transpose_b=True)

# int_var_fx = γ |T| + trace_terms
# trace_terms = - Tr(Kzz⁻¹ Ψ) + Tr(Kzz⁻¹ S Kzz⁻¹ Ψ)
Expand Down Expand Up @@ -244,7 +266,10 @@ def prior_kl(self, Kuu):
"""
KL divergence between p(u) = N(0, Kuu) and q(u) = N(μ, S)
"""
return kullback_leiblers.gauss_kl(self.q_mu[:, None], self.q_sqrt[None, :, :], Kuu)
if self.whiten:
return kullback_leiblers.gauss_kl(self.q_mu[:, None], self.q_sqrt[None, :, :])
else:
return kullback_leiblers.gauss_kl(self.q_mu[:, None], self.q_sqrt[None, :, :], Kuu)

def maximum_log_likelihood_objective(self, events):
return self.elbo(events)
Expand Down

0 comments on commit 85ad11d

Please sign in to comment.