Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of photoz uncertainties #128

Open
wants to merge 2 commits into
base: dev-clusters-both
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 168 additions & 36 deletions soliket/clusters/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ def _get_integrated(self, pk_intp, **kwargs):
test = np.abs(zz - nzarr[i+1])
i2 = np.argmin(test)

# if kwargs['scatter_z']:
# dndzz[:, i1:i2+1] *= 0.5*(special.erf((nzarr[i+1]-zz[i1:i2+1])/(np.sqrt(2)*sigma_z))
# - special.erf((nzarr[i]-zz[i1:i2+1])/(np.sqrt(2)*sigma_z)))

delN2D[i,:] = np.trapz(dndzz[:, i1:i2+1], x=zz[i1:i2+1]).T

self.log.info("\r Total predicted 2D N = {}".format(delN2D.sum()))
Expand Down Expand Up @@ -264,15 +268,15 @@ def _get_completeness(self, marr, zarr, y0, qbin, marr_500c=None, **params):

class UnbinnedClusterLikelihood(PoissonLikelihood):
name = "Unbinned Clusters"
columns = ["z", "tsz_signal", "tsz_signal_err", "tile_name"]
columns = []

verbose: bool = False
data: dict = {}
theorypred: dict = {}
selfunc: dict = {}
binning: dict = {}
YM: dict = {}
params = {"tenToA0":None, "B0":None, "C0":None, "scatter_sz":None, "bias_sz":None}
params = {"tenToA0":None, "B0":None, "C0":None, "scatter_sz":None, "bias_sz":None, "bias_wl":None}

def initialize(self):

Expand All @@ -283,18 +287,36 @@ def initialize(self):
self.zz = np.arange(0, zmax, 0.01) # redshift bounds should correspond to catalogue
if self.zz[0] == 0: self.zz[0] = 1e-5

# Quantities for z_scatter
self.nbins_sigmaz = 20
self.zobs_arr = np.logspace(-5, np.log10(2), 1000)

self.log.info('Number of redshift points for theory calculation = {}.'.format(len(self.zz)))
self.log.info('Number of mass points for theory calculation = {}.'.format(len(self.lnmarr)))
self.log.info('Number of y0 points for theory calculation = {}.'.format(len(self.lny)))

self.catalog = pd.DataFrame(
{
"z": self.z_cat.byteswap().newbyteorder(),
"tsz_signal": self.cat_tsz_signal.byteswap().newbyteorder(),
"tsz_signal_err": self.cat_tsz_signal_err.byteswap().newbyteorder(),
"tile_name": self.cat_tile_name.byteswap().newbyteorder()
}
)
if not self.theorypred['masscal_indiv']:
self.catalog = pd.DataFrame(
{
"z": self.z_cat.byteswap().newbyteorder(),
"z_err": self.cat_zerr.byteswap().newbyteorder(),
"tsz_signal": self.cat_tsz_signal.byteswap().newbyteorder(),
"tsz_signal_err": self.cat_tsz_signal_err.byteswap().newbyteorder(),
"tile_name": self.cat_tile_name.byteswap().newbyteorder()
}
)
else:
self.catalog = pd.DataFrame(
{
"z": self.z_cat.byteswap().newbyteorder(),
"z_err": self.cat_zerr.byteswap().newbyteorder(),
"tsz_signal": self.cat_tsz_signal.byteswap().newbyteorder(),
"tsz_signal_err": self.cat_tsz_signal_err.byteswap().newbyteorder(),
"Mobs": self.cat_Mobs.byteswap().newbyteorder(),
"Mobs_err": self.cat_Mobs_err.byteswap().newbyteorder(),
"tile_name": self.cat_tile_name.byteswap().newbyteorder()
}
)

# this is for liklihood computation
self.zcut = self.binning['exclude_zbin']
Expand All @@ -305,7 +327,7 @@ def get_requirements(self):
return get_requirements(self)

def _get_catalog(self):
return self.catalog, self.columns
return self.catalog, self.catalog.columns

# # checking rate_densities
# def get_dndlnm(self, z, pk_intp):
Expand Down Expand Up @@ -353,15 +375,35 @@ def _get_n_expected(self, pk_intp, **kwargs):
else:
Pfunc = self.PfuncY(Ynoise, marr, zz, kwargs)

if self.theorypred['scatter_z']:
if not hasattr(self, 'pztr'):
cat, cols = self._get_catalog()
zerr = cat['z_err']
assert np.any(zerr != 0.), 'z_scatter = True but all redshift errors in catalog are zero. Aborting.'
pzerr, zerr_binedges = np.histogram(zerr, bins=np.linspace(np.amin(zerr), np.amax(zerr),
self.nbins_sigmaz), density=True)
zerr_bins = 0.5*(zerr_binedges[:-1]+zerr_binedges[1:])
intg = np.trapz(pzerr, zerr_bins)

pzztrsig = gaussian(self.zz[:, None, None], self.zobs_arr[None, :, None], zerr_bins[None, None, :])
pzztr = np.trapz(pzztrsig*(pzerr/intg)[None, None, :], zerr_bins, axis=-1)
self.pztr = np.trapz(pzztr, self.zobs_arr, axis=-1)
pztr = self.pztr
else:
pztr = None

Ntot = 0
for index, frac in enumerate(self.skyfracs):
if zcut > 0:
Nz = self._get_n_expected_zbinned(zz, dVdz, dndlnm, Pfunc)
Nz = self._get_n_expected_zbinned(zz, dVdz, dndlnm, Pfunc, self.theorypred['scatter_z'], pztr)
zcut_arr = np.arange(zcut)
Ntot = np.sum(np.delete(Nz, zcut_arr, 0))
else:
Nz = np.trapz(dndlnm * Pfunc[:,:,index], dx=np.diff(self.lnmarr[:, None], axis=0), axis=0)
Ntot += np.trapz(Nz * dVdz, x=zz) * frac
Nz = np.trapz(dndlnm * Pfunc[:,:,index], self.lnmarr, axis=0)
if not self.theorypred['scatter_z']:
Ntot += np.trapz(Nz * dVdz, x=zz) * frac
else:
Ntot += np.trapz(Nz * dVdz * pztr, x=zz) * frac

self.log.info("Total predicted N = {}".format(Ntot))

Expand All @@ -370,14 +412,20 @@ def _get_n_expected(self, pk_intp, **kwargs):

return Ntot

def _get_n_expected_zbinned(self, zz, dVdz, dndlnm, Pfunc):
def _get_n_expected_zbinned(self, zz, dVdz, dndlnm, Pfunc, scatter_z=None, pztr=None):

if scatter_z is None:
scatter_z = False

zarr = self.zarr
nzarr = self.zbins

Nz = 0
for index, frac in enumerate(self.skyfracs):
Nz += np.trapz(dndlnm * dVdz * Pfunc[:,:,index], x=self.lnmarr[:, None], axis=0) * frac
if not scatter_z:
Nz += np.trapz(dndlnm * dVdz * Pfunc[:,:,index], x=self.lnmarr[:, None], axis=0) * frac
else:
Nz += np.trapz(dndlnm * dVdz * pztr * Pfunc[:, :, index], x=self.lnmarr[:, None], axis=0) * frac

Nzz = np.zeros(len(zarr))
for i in range(len(zarr)):
Expand Down Expand Up @@ -407,17 +455,27 @@ def _get_rate_fn(self, pk_intp, **kwargs):
dn_dzdm_intp = interp2d(zarr, self.lnmarr, np.log(dn_dzdm), kind='cubic', fill_value=0)
#dn_dzdm_intp = interp2d(zarr, marr, np.log(dn_dzdm), kind='cubic', fill_value=0)

def Prob_per_cluster(z, tsz_signal, tsz_signal_err, tile_name):
def Prob_per_cluster(z, z_err, tsz_signal, tsz_signal_err, tile_name, Mobs=None, Mobs_err=None):

c_z = z
c_zerr = z_err
c_y = tsz_signal * 1e-4
c_yerr = tsz_signal_err * 1e-4
c_tile = tile_name

tile_index = [self.tiles_dwnsmpld[c] for c in c_tile]

c_z, c_y, c_yerr, tile_index = zip(*sorted(zip(c_z, c_y, c_yerr, tile_index)))
c_z, c_y, c_yerr, tile_index = np.array(c_z), np.array(c_y), np.array(c_yerr), np.array(tile_index)
if Mobs is not None:
c_Mobs = Mobs
c_Mobs_err = Mobs_err

# Need to sort arrays because of interp2d
tile_index = np.array([self.tiles_dwnsmpld[c] for c in c_tile])
if Mobs is None:
sort_ind = np.argsort(c_z)
c_z, c_zerr, c_y, c_yerr, tile_index = c_z[sort_ind], c_zerr[sort_ind], c_y[sort_ind], \
c_yerr[sort_ind], tile_index[sort_ind]
else:
sort_ind = np.argsort(c_z)
c_z, c_zerr, c_y, c_yerr, tile_index, c_Mobs, c_Mobs_err = c_z[sort_ind], c_zerr[sort_ind], \
c_y[sort_ind], c_yerr[sort_ind], tile_index[sort_ind], c_Mobs[sort_ind], c_Mobs_err[sort_ind]

zcut = self.zcut
if zcut > 0:
Expand All @@ -426,11 +484,50 @@ def Prob_per_cluster(z, tsz_signal, tsz_signal_err, tile_name):
print("::: Excluding clusters of z < {} in likelihood.".format(self.zbins[zcut]))
print("Total observed N = {}".format(len(c_z)))

Pfunc_ind = self.Pfunc_per(tile_index, marr, c_z, c_y, c_yerr, kwargs)
dn_dzdm = np.exp(np.squeeze(dn_dzdm_intp(c_z, self.lnmarr)))
dVdz = get_dVdz(self, c_z, dVdz_interp=True)
ans = np.trapz(dn_dzdm * Pfunc_ind.T * dVdz[None,:], dx=np.diff(marr[:,None], axis=0), axis=0)
if not self.theorypred['scatter_z']:
P_Mobs = np.ones((self.lnmarr.shape[0], c_z.shape[0]))
if Mobs is not None:
P_Mobs[:, c_Mobs != 0.] = gaussian(kwargs['bias_wl']*np.exp(self.lnmarr)[:, None], c_Mobs[None, :],
c_Mobs_err[None, :])
Pfunc_ind = self.Pfunc_per(tile_index, marr, c_z, c_y, c_yerr, kwargs)
dn_dzdm = np.exp(np.squeeze(dn_dzdm_intp(c_z, self.lnmarr)))
dVdz = get_dVdz(self, c_z, dVdz_interp=True)
ans = np.trapz(dn_dzdm * Pfunc_ind.T * dVdz[None,:] * P_Mobs, marr, axis=0)

else:
assert np.any(c_zerr != 0.), 'z_scatter = True but all redshift errors in catalog are zero. Aborting.'
P_Mobs = np.ones((self.lnmarr.shape[0], c_z.shape[0]))
if Mobs is not None:
P_Mobs[:, c_Mobs != 0.] = gaussian(kwargs['bias_wl']*np.exp(self.lnmarr)[:, None], c_Mobs[None, :],
c_Mobs_err[None, :])
# Clusters with speczs
mask_nozerr = c_zerr == 0.
tile_index_nozerr = tile_index[mask_nozerr]
c_z_nozerr = c_z[mask_nozerr]
c_y_nozerr = c_y[mask_nozerr]
c_yerr_nozer = c_yerr[mask_nozerr]
Pfunc_ind = self.Pfunc_per(tile_index_nozerr, marr, c_z_nozerr, c_y_nozerr, c_yerr_nozer, kwargs)
dn_dzdm = np.exp(dn_dzdm_intp(c_z_nozerr, self.lnmarr))
dVdz = get_dVdz(self, c_z_nozerr, dVdz_interp=True)
rates_nozerr = np.trapz(dn_dzdm * Pfunc_ind.T * dVdz[None,:] * P_Mobs[:, mask_nozerr], marr, axis=0)

# Clusters with photozs
mask_zerr = c_zerr != 0.
tile_index_zerr = tile_index[mask_zerr]
c_z_zerr = c_z[mask_zerr]
c_zerr_zerr = c_zerr[mask_zerr]
c_y_zerr = c_y[mask_zerr]
c_yerr_zer = c_yerr[mask_zerr]
P_z = gaussian(self.zz[None, :], c_z_zerr[:, None], c_zerr_zerr[:, None])
dn_dzdm = np.exp(dn_dzdm_intp(self.zz, self.lnmarr))
dVdz = get_dVdz(self, self.zz, dVdz_interp=True)
Pfunc_ind = self.Pfunc_per(tile_index_zerr, marr, self.zz, c_y_zerr, c_yerr_zer, kwargs,
use_zscatter=True)
rates_zerr = np.trapz(dn_dzdm[None, :, :] * np.transpose(Pfunc_ind, axes=(0, 2, 1)) *
P_Mobs.T[mask_zerr, :, None] , marr, axis=1)
rates_zerr = np.trapz(rates_zerr * P_z * dVdz[None, :], self.zz, axis=1)

ans = np.concatenate((rates_nozerr, rates_zerr))
return ans

return Prob_per_cluster
Expand Down Expand Up @@ -537,7 +634,7 @@ def P_of_gt_SN(self, LgY, marr, z, Ynoise, params):

def PfuncY(self, Ynoise, marr, z, params):
LgY = self.lny
P_func = np.outer(marr, np.zeros([len(z)]))
# P_func = np.outer(marr, np.zeros([len(z)]))
P_func = self.P_of_gt_SN(LgY, marr, z, Ynoise, params)
return P_func

Expand All @@ -546,7 +643,7 @@ def Y_prob(self, Y_c, LgY, Ynoise):
ans = gaussian(Y, Y_c, Ynoise)
return ans

def Pfunc_per(self, tile_index, marr, z, Y_c, Y_err, params):
def Pfunc_per(self, tile_index, marr, z, Y_c, Y_err, params, use_zscatter=False):

if params["scatter_sz"] == 0:
if self.theorypred['md_hmf'] != self.theorypred['md_ym']:
Expand All @@ -558,10 +655,21 @@ def Pfunc_per(self, tile_index, marr, z, Y_c, Y_err, params):
else:
marr_500c = marr_ymmd

Ytilde = get_y0(self, marr_ymmd, z, marr_500c, use_Q=True, Ez_interp=True, tile_index=tile_index, **params)
LgYtilde = np.log(Ytilde)
P_Y_sig = np.nan_to_num(self.Y_prob(Y_c[:, None], LgYtilde, Y_err[:, None]))
ans = P_Y_sig
if not use_zscatter:
Ytilde = get_y0(self, marr_ymmd, z, marr_500c, use_Q=True, Ez_interp=True, tile_index=tile_index, **params)
LgYtilde = np.log(Ytilde)
P_Y_sig = np.nan_to_num(self.Y_prob(Y_c[:, None], LgYtilde, Y_err[:, None]))
ans = P_Y_sig
else:
Ytilde = get_y0(self, marr_ymmd, z, marr_500c, use_Q=True, Ez_interp=True, tile_index=tile_index, **params)
LgYtilde = np.log(Ytilde)
P_Y_sig = np.nan_to_num(self.Y_prob(Y_c[:, None, None], LgYtilde, Y_err[:, None, None]))
ans = P_Y_sig
# else:
# Ytilde = get_y0(self, marr_ymmd, z, marr_500c, use_Q=True, Ez_interp=True, tile_index=tile_index, **params)
# LgYtilde = np.log(Ytilde)
# P_Y_sig = np.nan_to_num(self.Y_prob(Y_c, LgYtilde, Y_err))
# ans = P_Y_sig

else:
LgY = self.lny
Expand Down Expand Up @@ -613,13 +721,27 @@ def initialize_common(self):
assert self.selfunc['dwnsmpl_bins'] is not None, 'resolution = downsample but no bin number given. Aborting.'
self.log.info('Running completeness with down-sampled selection function inputs.')

# Photoz error settings
if 'scatter_z' not in self.theorypred:
self.log.info('Not considering photoz uncertainties.')
self.theorypred['scatter_z'] = False
# Masscalibration settings
if 'masscal_indiv' not in self.theorypred:
self.log.info('Not considering cluster-based mass-calibration.')
self.theorypred['masscal_indiv'] = False

catf = fits.open(os.path.join(self.data_directory, self.datafile))
data = catf[1].data
zcat = data.field("redshift")
qcat = data.field("fixed_SNR") #NB note that there are another SNR in the catalogue
cat_tsz_signal = data.field("fixed_y_c")
cat_tsz_signal_err = data.field("fixed_err_y_c")
cat_tile_name = data.field("tileName")
cat_zerr = data.field('redshiftErr')
if self.theorypred['masscal_indiv']:
assert 'Mobs' in data.columns.names, 'Cluster-based masscalibration requested but no individual masses provided. Aborting.'
cat_Mobs = data.field('Mobs')
cat_Mobs_err = data.field('Mobs_err')
# to print all columns: print(catf[1].columns)

# SPT-style SNR bias correction, this should be applied before applying SNR cut
Expand All @@ -633,6 +755,10 @@ def initialize_common(self):
self.cat_tsz_signal = cat_tsz_signal[ind]
self.cat_tsz_signal_err = cat_tsz_signal_err[ind]
self.cat_tile_name = cat_tile_name[ind]
self.cat_zerr = cat_zerr[ind]
if self.theorypred['masscal_indiv']:
self.cat_Mobs = cat_Mobs[ind]
self.cat_Mobs_err = cat_Mobs_err[ind]

self.N_cat = len(self.q_cat)
self.log.info('Total number of clusters in catalogue = {}.'.format(len(zcat)))
Expand Down Expand Up @@ -1261,10 +1387,16 @@ def get_splQ(self, theta, tile_index=None):

if tile_index is not None: # for faster rate_fn in unbinned

chosenQ = np.zeros((newQ.shape[1], newQ.shape[2]))
for i in range(len(tile_index)):
chosenQ[i, :] = newQ[tile_index[i], i, :]
newQ = chosenQ
if len(tile_index) == newQ.shape[1]:
chosenQ = np.zeros((newQ.shape[1], newQ.shape[2]))
for i in range(len(tile_index)):
chosenQ[i, :] = newQ[tile_index[i], i, :]
newQ = chosenQ
else:
chosenQ = np.zeros((len(tile_index), newQ.shape[1], newQ.shape[2]))
for i in range(len(tile_index)):
chosenQ[i, :] = newQ[tile_index[i], :]
newQ = chosenQ

else:
newQ = []
Expand Down
2 changes: 1 addition & 1 deletion soliket/poisson_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def poisson_logpdf(n_expected, catalog, columns, rate_fn, name="unbinned"):
elapsed = time.time() - start
print("\r ::: rate density calculation took {:.3f} seconds.".format(elapsed))

loglike = -n_expected + np.nansum(np.log(rate_densities))
loglike = -n_expected + np.nansum(np.log(rate_densities[rate_densities!=0.]))

print("\r ::: 2D ln likelihood = ", loglike)

Expand Down