Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tempered mcmc #180

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
aca9102
mcmc: tempereture: basic implementation [skip travis]
JesusTorrado May 20, 2021
579269a
Merge branch 'master' into mcmc_tempered [skip travis]
JesusTorrado Aug 25, 2021
937c19d
more on temperatures [skip travis]
JesusTorrado Sep 7, 2021
1606031
collection: some type hinting
JesusTorrado Sep 8, 2021
5a12f05
temperature: fixes
JesusTorrado Sep 8, 2021
4e80362
collection: bugfixes and reworking [skip travis]
JesusTorrado Sep 8, 2021
d524abf
collection: removed old slicing made it unnecessary bc caching
JesusTorrado Sep 8, 2021
3094a3d
Merge branch 'master' into mcmc_tempered
JesusTorrado Sep 8, 2021
3484e27
mcmc: takes temperature into account for bound computation
JesusTorrado Sep 8, 2021
0580398
mcmc_tempered: bugfix in Collection, and some refactoring
JesusTorrado Sep 22, 2021
0f99386
collection: bugfix (new pandas behaviour) and temp in MAP
JesusTorrado Sep 22, 2021
89b3188
mcmc_tempered: documentation
JesusTorrado Sep 22, 2021
ad74149
mcmc_tempered: added test
JesusTorrado Sep 22, 2021
81f75be
typo
JesusTorrado Nov 22, 2021
5dd8be8
docs typo
JesusTorrado Nov 23, 2021
6dc60cd
typos and clarifications
JesusTorrado Nov 23, 2021
4597f50
model and collection: more typing
JesusTorrado Nov 24, 2021
b169ec6
collection: covmat computation robustness
JesusTorrado Nov 25, 2021
48fc2e6
prior: added method set_reference to update reference pdf, MPI-aware
JesusTorrado Dec 5, 2021
d929322
Merge branch 'master' into mcmc_tempered
cmbant Feb 24, 2022
1b42142
Merge branch 'master' into mcmc_tempered
cmbant Feb 24, 2022
6a89357
Merge branch 'master' into mcmc_tempered
cmbant Oct 6, 2022
8e8a5f4
trivial
cmbant Oct 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
- Bugfix in decorrelation function #196 (by Caterina Umilta, @umilta)
- Updated to 2021 data release (2018 data) and bugfix, #204 and #209 (by Dominic Beck, @doicbek)


#### Planck

- Fixed segfault in clik when receiving NaN in the Cl's. Partially implements #231 (thanks @lukashergt and @williamjameshandley)
Expand Down
144 changes: 120 additions & 24 deletions cobaya/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ class SampleCollection(BaseCollection):
but slicing can be done on the ``SampleCollection`` itself
(returns a copy, not a view).

When ``temperature`` is different from 1, weights and log-posterior (but not priors
or likelihoods' chi squared's) are those of the tempered sample, obtained assuming a
posterior raised to the power of ``1/temperature``.

Note for developers: when expanding this class or inheriting from it, always access
the underlying DataFrame as ``self.data`` and not ``self._data``, to ensure the cache
has been dumped. If you really need to access the actual attribute ``self._data`` in a
Expand All @@ -102,7 +106,7 @@ class SampleCollection(BaseCollection):

def __init__(self, model, output=None, cache_size=_default_cache_size, name=None,
extension=None, file_name=None, resuming=False, load=False,
onload_skip=0, onload_thin=1):
temperature=1, onload_skip=0, onload_thin=1):
super().__init__(model, name)
self.cache_size = cache_size
# Create/load the main data frame and the tracking indices
Expand Down Expand Up @@ -167,6 +171,7 @@ def __init__(self, model, output=None, cache_size=_default_cache_size, name=None
self._out_delete()
if not resuming and not load:
self.reset()
self.temperature = temperature if temperature is not None else 1
# Prepare fast numpy cache
self._icol = {col: i for i, col in enumerate(self.columns)}
self._cache_reset()
Expand All @@ -183,6 +188,7 @@ def add(self, values: Union[Sequence[float], np.ndarray],
logpriors: Optional[Sequence[float]] = None,
loglikes: Optional[Sequence[float]] = None,
derived: Optional[Sequence[float]] = None,
temperature: Optional[float] = None,
weight: float = 1):
"""
Adds a point to the collection.
Expand All @@ -191,15 +197,16 @@ def add(self, values: Union[Sequence[float], np.ndarray],
`logpriors`, `loglikes` are both required).
"""
logposterior = self._check_before_adding(
values, logpost=logpost, logpriors=logpriors,
loglikes=loglikes, derived=derived, weight=weight)
values, logpost=logpost, logpriors=logpriors, loglikes=loglikes,
derived=derived, temperature=temperature, weight=weight)
self._cache_add(values, logposterior=logposterior, weight=weight)

def _check_before_adding(self, values: Union[Sequence[float], np.ndarray],
logpost: Optional[Union[LogPosterior, float]] = None,
logpriors: Optional[Sequence[float]] = None,
loglikes: Optional[Sequence[float]] = None,
derived: Optional[Sequence[float]] = None,
temperature: Optional[float] = None,
weight: float = 1
) -> LogPosterior:
"""
Expand Down Expand Up @@ -238,11 +245,26 @@ def _check_before_adding(self, values: Union[Sequence[float], np.ndarray],
self.log,
"derived params not consistent with those of LogPosterior object "
"passed.")
if temperature is not None:
if not np.isclose(temperature, logpost.temperature):
raise LoggedError(
self.log,
"temperature not consistent with that of LogPosterior object "
"passed.")
if not np.isclose(self.temperature, logpost.temperature):
raise LoggedError(
self.log,
"temperature of added point not consistent with that of sample.")
return_logpost = logpost
elif isinstance(logpost, float) or logpost is None:
if temperature is not None:
if not np.isclose(temperature, self.temperature):
raise LoggedError(
self.log, "temperature not consistent with that of sample.")
try:
return_logpost = LogPosterior(
logpriors=logpriors, loglikes=loglikes, derived=derived)
logpost=logpost, logpriors=logpriors, loglikes=loglikes,
derived=derived, temperature=temperature)
except ValueError as valerr:
# missing logpriors/loglikes if logpost is None,
# or inconsistent sum if logpost given
Expand Down Expand Up @@ -410,54 +432,76 @@ def copy(self, empty=False) -> 'SampleCollection':
return self._copy(empty=empty)

def _weights_for_stats(self, first: Optional[int] = None, last: Optional[int] = None,
pweight: bool = False) -> np.ndarray:
pweight: bool = False, ignore_temperature: bool = False
) -> np.ndarray:
"""
Returns weights for computation of statistical quantities such as mean and
covariance.

If ``ignore_temperature=True`` (default ``False``), the sample is weighted as
if the tempered logposterior were the real one (e.g. variances would be larger
for hot samples).

If ``pweight=True`` (default ``False``) every point is weighted with its
probability, and sample weights are ignored (may lead to very inaccurate
estimates!).
"""
if pweight:
logps = -self[OutPar.minuslogpost][first:last]\
.to_numpy(dtype=np.float64, copy=True)
if ignore_temperature:
logps = -self[OutPar.minuslogpost][first:last] \
.to_numpy(dtype=np.float64, copy=True)
else:
logps = -self.detempered_minuslogpost(first, last)
logps -= max(logps)
weights = np.exp(logps)
else:
weights = self[OutPar.weight][first:last]\
weights = self[OutPar.weight][first:last] \
.to_numpy(dtype=np.float64, copy=True)
if not ignore_temperature:
weights *= self.detempering_reweight_factor(first, last)
return weights

def mean(self, first: Optional[int] = None, last: Optional[int] = None,
pweight: bool = False, derived: bool = False) -> np.ndarray:
pweight: bool = False, ignore_temperature: bool = False,
derived: bool = False) -> np.ndarray:
"""
Returns the (weighted) mean of the parameters in the chain,
between `first` (default 0) and `last` (default last obtained),
optionally including derived parameters if `derived=True` (default `False`).

If ``ignore_temperature=True`` (default ``False``), the sample is weighted as
if the tempered logposterior were the real one (e.g. variances would be larger
for hot samples).

If ``pweight=True`` (default ``False``) every point is weighted with its
probability, and sample weights are ignored (may lead to inaccurate
probability, and sample weights are ignored (may lead to very inaccurate
estimates!).
"""
weights = self._weights_for_stats(first, last, pweight=pweight)
weights = self._weights_for_stats(first, last, pweight=pweight,
ignore_temperature=ignore_temperature)
return np.average(self[list(self.sampled_params) +
(list(self.derived_params) if derived else [])]
[first:last].to_numpy(dtype=np.float64).T, weights=weights,
axis=-1)

def cov(self, first: Optional[int] = None, last: Optional[int] = None,
pweight: bool = False, derived: bool = False) -> np.ndarray:
pweight: bool = False, ignore_temperature: bool = False,
derived: bool = False) -> np.ndarray:
"""
Returns the (weighted) covariance matrix of the parameters in the chain,
between `first` (default 0) and `last` (default last obtained),
optionally including derived parameters if `derived=True` (default `False`).

If ``ignore_temperature=True`` (default ``False``), the sample is weighted as
if the tempered logposterior were the real one (e.g. variances would be larger
for hot samples).

If ``pweight=True`` (default ``False``) every point is weighted with its
probability, and sample weights are ignored (may lead to very inaccurate
estimates!).
"""
weights = self._weights_for_stats(first, last, pweight=pweight)
weights = self._weights_for_stats(first, last, pweight=pweight,
ignore_temperature=ignore_temperature)
if pweight:
weight_type_kwarg = "aweights"
elif np.allclose(np.round(weights), weights):
Expand All @@ -472,17 +516,52 @@ def cov(self, first: Optional[int] = None, last: Optional[int] = None,
**{weight_type_kwarg: weights}))

def reweight(self, importance_weights):
"""
Reweights the sample with the given ``importance_weights``.

This cannot be fully undone (e.g. recovering original integer weights).
You may want to call this method on a copy (see :func:`SampleCollection.copy`).
"""
self._cache_dump()
self._data[OutPar.weight] *= importance_weights
self._data = self.data[self._data.weight > 0].reset_index(drop=True)
self._n = self._data.last_valid_index() + 1

def detempering_reweight_factor(self, first: Optional[int] = None,
last: Optional[int] = None):
"""
Returns the reweighting factor necessary to remove the effect of the sampling
temperature.
"""
if self.temperature != 1:
logp_temp = -self[OutPar.minuslogpost][first:last].to_numpy(dtype=np.float64)
log_ratio = logp_temp * (self.temperature - 1)
return np.exp(log_ratio - np.max(log_ratio))
else:
return 1

def detempered_minuslogpost(self, first: Optional[int] = None,
last: Optional[int] = None):
"""
Returns the minus log-posterior of the original (temperature 1) pdf at the
samples, as a numpy array.
"""
if self.temperature != 1:
return (
self.data[OutPar.minuslogprior][first:last].to_numpy(
dtype=np.float64) +
self.data[OutPar.chi2][first:last].to_numpy(dtype=np.float64) / 2)
else:
return self[OutPar.minuslogpost][first:last].to_numpy(
dtype=np.float64, copy=True)

def detemper(self):
"""
Removes the effect of sampling temperature.

This cannot be fully undone (e.g. recovering the original integer weights).
You may want to call this method on a copy (see :func:`~SampleCollection.copy`).
"""
self._cache_dump()
self.reweight(self.detempering_reweight_factor())
# log-post and temperature must be changed *after* computing reweighting factors!
self._data[OutPar.minuslogpost] = self.detempered_minuslogpost()
self.temperature = 1

def filtered_copy(self, where) -> 'SampleCollection':
return self._copy(self.data[where].reset_index(drop=True))

Expand Down Expand Up @@ -516,14 +595,25 @@ def thin_samples(self, thin, inplace=False) -> 'SampleCollection':

def bestfit(self):
"""Best fit (maximum likelihood) sample. Returns a copy."""
return self.data.loc[self.data[OutPar.chi2].idxmin()].copy()
return self.data.loc[self.data[OutPar.chi2].astype(np.float64).idxmin()].copy()

def MAP(self, ignore_temperature: bool = False):
"""
Maximum-a-posteriori (MAP) sample. Returns a copy.

def MAP(self):
"""Maximum-a-posteriori (MAP) sample. Returns a copy."""
return self.data.loc[self.data[OutPar.minuslogpost].idxmin()].copy()
If ``ignore_temperature=True`` (default ``False``), it returns the maximum
tempered posterior, instead of the original one.
"""
i_max_logpost = self.data[OutPar.minuslogpost].astype(np.float64).idxmin()
MAP = self.data.loc[i_max_logpost].copy()
if not ignore_temperature:
MAP[OutPar.minuslogpost] = self.detempered_minuslogpost(
first=i_max_logpost, last=i_max_logpost + 1)[0]
return MAP

def sampled_to_getdist_mcsamples(
self, first: Optional[int] = None, last: Optional[int] = None) -> MCSamples:
self, first: Optional[int] = None, last: Optional[int] = None,
ignore_temperature: bool = False) -> MCSamples:
"""
Basic interface with getdist -- internal use only!
(For analysis and plotting use `getdist.mcsamples.MCSamplesFromCobaya
Expand All @@ -540,6 +630,8 @@ def sampled_to_getdist_mcsamples(
loglikes=self.data[OutPar.minuslogpost].to_numpy(
dtype=np.float64)[first:last],
names=names)
if not ignore_temperature:
mcsamples.cool(self.temperature)
return mcsamples

# Saving and updating
Expand Down Expand Up @@ -655,6 +747,10 @@ def add(self, values, logpost: LogPosterior):
def logpost(self):
return self.results.logpost

@property
def temperature(self):
return self.results.temperature

def add_to_collection(self, collection: SampleCollection):
"""
Adds this point at the end of a given collection.
Expand Down
Loading