You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Popular 2-sample tests often measure the difference in means or the conditional difference in means.
Tests such as MMD test for differences in distribution, generalizing past only the first moment (mean).
The relatively recent paper here presents a Kernel Conditional Discrepancy (KCD) Test to test for the difference between two conditional distributions Y_1 | X_1 vs Y_2 | X_2. Like MMD, it is generalizing the conditional test past purely the first moment. It also provides interpretable ways to analyze where this discrepancy comes from.
Author's code is as follows, relying on the falkon package for efficient computation of kernels and kernel regression. I have correct(?) code reliant on sklearn, not falkon, with replicated results from the paper.
import numpy as np
from falkon import kernels, models, FalkonOptions, gsc_losses
import torch
# The following function computes the kernel matrix using the Gaussian kernel between vectors x and y, with bandwidth
# sigma. If y=None, the Gram matrix is computed based on x. sigma can be a vector of bandwidths, one for each
# dimension.
def kernel_matrix(x, y=None, sigma=None):
if y is None:
y = x
p = x.shape[1]
if isinstance(sigma, float):
sigma = sigma * np.ones(p)
each = [0] * p
for i in range(p):
xi = x[:, i]
yi = y[:, i]
each[i] = np.exp(-0.5 * (np.reshape(xi ** 2, [-1, 1]) + np.reshape(yi ** 2, [1, -1])
- 2 * np.matmul(np.reshape(xi, [-1, 1]), np.reshape(yi, [1, -1]))) / (sigma[i] ** 2))
return np.prod(each, axis=0)
# The following function divides x and y into control and treatment groups, x_c, x_t, y_c and y_t respectively,
# based on the treatment assignment vector z. It also computes the matrix W for the control and treatment group,
# which is the inverse of sum of the Gram matrix and the identity matrix times the regularisation parameter.
def data_prep(x, y, z, sigma_x, reg):
x_c = x[np.array(1 - z, dtype=bool), :]
x_t = x[np.array(z, dtype=bool), :]
y_c = y[np.array(1 - z, dtype=bool), :]
y_t = y[np.array(z, dtype=bool), :]
k_c = kernel_matrix(x_c, sigma=sigma_x)
k_t = kernel_matrix(x_t, sigma=sigma_x)
w_c = np.linalg.inv(k_c + reg * np.identity(int(np.sum(1 - z))))
w_t = np.linalg.inv(k_t + reg * np.identity(int(np.sum(z))))
return x_c, x_t, y_c, y_t, w_c, w_t
# This function computes the witness function between control and treatment groups at arg_x and arg_y.
def witness(arg_x, arg_y, x_c, x_t, y_c, y_t, w_c, w_t, sigma_x, sigma_y):
l_c = kernel_matrix(y_c, arg_y, sigma_y)
l_t = kernel_matrix(y_t, arg_y, sigma_y)
k_c = kernel_matrix(x_c, arg_x, sigma_x)
k_t = kernel_matrix(x_t, arg_x, sigma_x)
return (k_t.T @ w_t @ l_t - k_c.T @ w_c @ l_c).T
# This function carries out generalised kernel ridge regression for U-statistic regression, at arg_x.
def var_regression(arg_x, x, y, sigma_x, reg):
arg_x = np.tile(arg_x, 2)
n = x.shape[0]
p = x.shape[1]
x_pairs = np.zeros((n ** 2, 2 * p))
x_pairs[:, range(p)] = np.repeat(x, n, axis=0)
x_pairs[:, range(p, 2 * p)] = np.tile(x, (n, 1))
h = np.reshape(0.5 * (np.power(y, 2) + np.power(y, 2).T - 2 * np.matmul(y, y.T)), -1)
kernel = kernels.GaussianKernel(sigma=torch.tensor(sigma_x))
options = FalkonOptions(use_cpu=True, keops_active='no')
model = models.Falkon(kernel=kernel, penalty=reg, M=n, options=options)
model.fit(torch.from_numpy(x_pairs), torch.from_numpy(h))
var = model.predict(torch.from_numpy(arg_x)).numpy()
return var
# This function computes the test statistic, t
def t(x, x_c, x_t, y_c, y_t, w_c, w_t, sigma_x, sigma_y, size):
k_c_tilde = kernel_matrix(x, x_c, sigma=sigma_x)
k_t_tilde = kernel_matrix(x, x_t, sigma=sigma_x)
l_c = kernel_matrix(y_c, sigma=sigma_y)
l_t = kernel_matrix(y_t, sigma=sigma_y)
l_ct = kernel_matrix(y_c, y_t, sigma=sigma_y)
first = np.trace(k_c_tilde @ w_c @ l_c @ w_c @ k_c_tilde.T)
second = np.trace(k_c_tilde @ w_c @ l_ct @ w_t @ k_t_tilde.T)
third = np.trace(k_t_tilde @ w_t @ l_t @ w_t @ k_t_tilde.T)
return (first - 2 * second + third) / size
# This function conducts a hypothesis test of equality between the control and treatment conditional distributions,
# using no_perm number of permutations.
def test(x, y, z, x_c, x_t, y_c, y_t, w_c, w_t, sigma_x, sigma_y, size, reg, no_perm, propensity=None):
t_hat = t(x, x_c, x_t, y_c, y_t, w_c, w_t, sigma_x, sigma_y, size)
if propensity is not None:
e_hat = propensity
else:
kernel = kernels.GaussianKernel(sigma=torch.tensor(sigma_x))
options = FalkonOptions(use_cpu=True, keops_active='no')
loss = gsc_losses.LogisticLoss(kernel)
logistic_model = models.LogisticFalkon(kernel=kernel, M=size, loss=loss, options=options,
penalty_list=[1e-2, 1e-4, 1e-6, 1e-6, 1e-8],
iter_list=[3, 3, 3, 8, 8])
if len(x.shape) == 1:
x = np.reshape(x, [-1, 1])
zz = np.ones(size)
zz[z < 0.5] = -1
logistic_model.fit(torch.from_numpy(x), torch.from_numpy(zz))
predictions = np.reshape(logistic_model.predict(torch.from_numpy(x)).numpy(), -1)
e_hat = np.exp(predictions) / (1 + np.exp(predictions))
t_k = np.zeros(no_perm)
for k in range(no_perm):
z_tilde = np.random.binomial(1, e_hat)
x_tilde_c, x_tilde_t, y_tilde_c, y_tilde_t, w_tilde_c, w_tilde_t = data_prep(x, y, z_tilde, sigma_x, reg)
t_k[k] = t(x, x_tilde_c, x_tilde_t, y_tilde_c, y_tilde_t, w_tilde_c, w_tilde_t, sigma_x, sigma_y, size)
p = (1 + np.sum(t_k > t_hat)) / (1 + no_perm)
return p
The text was updated successfully, but these errors were encountered:
Popular 2-sample tests often measure the difference in means or the conditional difference in means.
Tests such as MMD test for differences in distribution, generalizing past only the first moment (mean).
The relatively recent paper here presents a Kernel Conditional Discrepancy (KCD) Test to test for the difference between two conditional distributions Y_1 | X_1 vs Y_2 | X_2. Like MMD, it is generalizing the conditional test past purely the first moment. It also provides interpretable ways to analyze where this discrepancy comes from.
Author's code is as follows, relying on the falkon package for efficient computation of kernels and kernel regression. I have correct(?) code reliant on sklearn, not falkon, with replicated results from the paper.
The text was updated successfully, but these errors were encountered: