CobayaSampler · JesusTorrado · May 20, 2021 · Aug 25, 2021 · Sep 7, 2021 · Sep 8, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -60,6 +60,7 @@
 - Bugfix in decorrelation function #196 (by Caterina Umilta, @umilta)
 - Updated to 2021 data release (2018 data) and bugfix, #204 and #209 (by Dominic Beck, @doicbek)
 
+
 #### Planck
 
 - Fixed segfault in clik when receiving NaN in the Cl's. Partially implements #231 (thanks @lukashergt and @williamjameshandley)

diff --git a/cobaya/collection.py b/cobaya/collection.py
@@ -94,6 +94,10 @@ class SampleCollection(BaseCollection):
     but slicing can be done on the ``SampleCollection`` itself
     (returns a copy, not a view).
 
+    When ``temperature`` is different from 1, weights and log-posterior (but not priors
+    or likelihoods' chi squared's) are those of the tempered sample, obtained assuming a
+    posterior raised to the power of ``1/temperature``.
+
     Note for developers: when expanding this class or inheriting from it, always access
     the underlying DataFrame as ``self.data`` and not ``self._data``, to ensure the cache
     has been dumped. If you really need to access the actual attribute ``self._data`` in a
@@ -102,7 +106,7 @@ class SampleCollection(BaseCollection):
 
     def __init__(self, model, output=None, cache_size=_default_cache_size, name=None,
                  extension=None, file_name=None, resuming=False, load=False,
-                 onload_skip=0, onload_thin=1):
+                 temperature=1, onload_skip=0, onload_thin=1):
         super().__init__(model, name)
         self.cache_size = cache_size
         # Create/load the main data frame and the tracking indices
@@ -167,6 +171,7 @@ def __init__(self, model, output=None, cache_size=_default_cache_size, name=None
             self._out_delete()
         if not resuming and not load:
             self.reset()
+        self.temperature = temperature if temperature is not None else 1
         # Prepare fast numpy cache
         self._icol = {col: i for i, col in enumerate(self.columns)}
         self._cache_reset()
@@ -183,6 +188,7 @@ def add(self, values: Union[Sequence[float], np.ndarray],
             logpriors: Optional[Sequence[float]] = None,
             loglikes: Optional[Sequence[float]] = None,
             derived: Optional[Sequence[float]] = None,
+            temperature: Optional[float] = None,
             weight: float = 1):
         """
         Adds a point to the collection.
@@ -191,15 +197,16 @@ def add(self, values: Union[Sequence[float], np.ndarray],
         `logpriors`, `loglikes` are both required).
         """
         logposterior = self._check_before_adding(
-            values, logpost=logpost, logpriors=logpriors,
-            loglikes=loglikes, derived=derived, weight=weight)
+            values, logpost=logpost, logpriors=logpriors, loglikes=loglikes,
+            derived=derived, temperature=temperature, weight=weight)
         self._cache_add(values, logposterior=logposterior, weight=weight)
 
     def _check_before_adding(self, values: Union[Sequence[float], np.ndarray],
                              logpost: Optional[Union[LogPosterior, float]] = None,
                              logpriors: Optional[Sequence[float]] = None,
                              loglikes: Optional[Sequence[float]] = None,
                              derived: Optional[Sequence[float]] = None,
+                             temperature: Optional[float] = None,
                              weight: float = 1
                              ) -> LogPosterior:
         """
@@ -238,11 +245,26 @@ def _check_before_adding(self, values: Union[Sequence[float], np.ndarray],
                         self.log,
                         "derived params not consistent with those of LogPosterior object "
                         "passed.")
+            if temperature is not None:
+                if not np.isclose(temperature, logpost.temperature):
+                    raise LoggedError(
+                        self.log,
+                        "temperature not consistent with that of LogPosterior object "
+                        "passed.")
+            if not np.isclose(self.temperature, logpost.temperature):
+                raise LoggedError(
+                    self.log,
+                    "temperature of added point not consistent with that of sample.")
             return_logpost = logpost
         elif isinstance(logpost, float) or logpost is None:
+            if temperature is not None:
+                if not np.isclose(temperature, self.temperature):
+                    raise LoggedError(
+                        self.log, "temperature not consistent with that of sample.")
             try:
                 return_logpost = LogPosterior(
-                    logpriors=logpriors, loglikes=loglikes, derived=derived)
+                    logpost=logpost, logpriors=logpriors, loglikes=loglikes,
+                    derived=derived, temperature=temperature)
             except ValueError as valerr:
                 # missing logpriors/loglikes if logpost is None,
                 # or inconsistent sum if logpost given
@@ -410,54 +432,76 @@ def copy(self, empty=False) -> 'SampleCollection':
         return self._copy(empty=empty)
 
     def _weights_for_stats(self, first: Optional[int] = None, last: Optional[int] = None,
-                           pweight: bool = False) -> np.ndarray:
+                           pweight: bool = False, ignore_temperature: bool = False
+                           ) -> np.ndarray:
         """
         Returns weights for computation of statistical quantities such as mean and
         covariance.
 
+        If ``ignore_temperature=True`` (default ``False``), the sample is weighted as
+        if the tempered logposterior were the real one (e.g. variances would be larger
+        for hot samples).
+
         If ``pweight=True`` (default ``False``) every point is weighted with its
         probability, and sample weights are ignored (may lead to very inaccurate
         estimates!).
         """
         if pweight:
-            logps = -self[OutPar.minuslogpost][first:last]\
-                .to_numpy(dtype=np.float64, copy=True)
+            if ignore_temperature:
+                logps = -self[OutPar.minuslogpost][first:last] \
+                    .to_numpy(dtype=np.float64, copy=True)
+            else:
+                logps = -self.detempered_minuslogpost(first, last)
             logps -= max(logps)
             weights = np.exp(logps)
         else:
-            weights = self[OutPar.weight][first:last]\
+            weights = self[OutPar.weight][first:last] \
                 .to_numpy(dtype=np.float64, copy=True)
+            if not ignore_temperature:
+                weights *= self.detempering_reweight_factor(first, last)
         return weights
 
     def mean(self, first: Optional[int] = None, last: Optional[int] = None,
-             pweight: bool = False, derived: bool = False) -> np.ndarray:
+             pweight: bool = False, ignore_temperature: bool = False,
+             derived: bool = False) -> np.ndarray:
         """
         Returns the (weighted) mean of the parameters in the chain,
         between `first` (default 0) and `last` (default last obtained),
         optionally including derived parameters if `derived=True` (default `False`).
 
+        If ``ignore_temperature=True`` (default ``False``), the sample is weighted as
+        if the tempered logposterior were the real one (e.g. variances would be larger
+        for hot samples).
+
         If ``pweight=True`` (default ``False``) every point is weighted with its
-        probability, and sample weights are ignored (may lead to inaccurate
+        probability, and sample weights are ignored (may lead to very inaccurate
         estimates!).
         """
-        weights = self._weights_for_stats(first, last, pweight=pweight)
+        weights = self._weights_for_stats(first, last, pweight=pweight,
+                                          ignore_temperature=ignore_temperature)
         return np.average(self[list(self.sampled_params) +
                                (list(self.derived_params) if derived else [])]
                           [first:last].to_numpy(dtype=np.float64).T, weights=weights,
                           axis=-1)
 
     def cov(self, first: Optional[int] = None, last: Optional[int] = None,
-            pweight: bool = False, derived: bool = False) -> np.ndarray:
+            pweight: bool = False, ignore_temperature: bool = False,
+            derived: bool = False) -> np.ndarray:
         """
         Returns the (weighted) covariance matrix of the parameters in the chain,
         between `first` (default 0) and `last` (default last obtained),
         optionally including derived parameters if `derived=True` (default `False`).
 
+        If ``ignore_temperature=True`` (default ``False``), the sample is weighted as
+        if the tempered logposterior were the real one (e.g. variances would be larger
+        for hot samples).
+
         If ``pweight=True`` (default ``False``) every point is weighted with its
         probability, and sample weights are ignored (may lead to very inaccurate
         estimates!).
         """
-        weights = self._weights_for_stats(first, last, pweight=pweight)
+        weights = self._weights_for_stats(first, last, pweight=pweight,
+                                          ignore_temperature=ignore_temperature)
         if pweight:
             weight_type_kwarg = "aweights"
         elif np.allclose(np.round(weights), weights):
@@ -472,17 +516,52 @@ def cov(self, first: Optional[int] = None, last: Optional[int] = None,
             **{weight_type_kwarg: weights}))
 
     def reweight(self, importance_weights):
-        """
-        Reweights the sample with the given ``importance_weights``.
-
-        This cannot be fully undone (e.g. recovering original integer weights).
-        You may want to call this method on a copy (see :func:`SampleCollection.copy`).
-        """
         self._cache_dump()
         self._data[OutPar.weight] *= importance_weights
         self._data = self.data[self._data.weight > 0].reset_index(drop=True)
         self._n = self._data.last_valid_index() + 1
 
+    def detempering_reweight_factor(self, first: Optional[int] = None,
+                                    last: Optional[int] = None):
+        """
+        Returns the reweighting factor necessary to remove the effect of the sampling
+        temperature.
+        """
+        if self.temperature != 1:
+            logp_temp = -self[OutPar.minuslogpost][first:last].to_numpy(dtype=np.float64)
+            log_ratio = logp_temp * (self.temperature - 1)
+            return np.exp(log_ratio - np.max(log_ratio))
+        else:
+            return 1
+
+    def detempered_minuslogpost(self, first: Optional[int] = None,
+                                last: Optional[int] = None):
+        """
+        Returns the minus log-posterior of the original (temperature 1) pdf at the
+        samples, as a numpy array.
+        """
+        if self.temperature != 1:
+            return (
+                    self.data[OutPar.minuslogprior][first:last].to_numpy(
+                        dtype=np.float64) +
+                    self.data[OutPar.chi2][first:last].to_numpy(dtype=np.float64) / 2)
+        else:
+            return self[OutPar.minuslogpost][first:last].to_numpy(
+                dtype=np.float64, copy=True)
+
+    def detemper(self):
+        """
+        Removes the effect of sampling temperature.
+
+        This cannot be fully undone (e.g. recovering the original integer weights).
+        You may want to call this method on a copy (see :func:`~SampleCollection.copy`).
+        """
+        self._cache_dump()
+        self.reweight(self.detempering_reweight_factor())
+        # log-post and temperature must be changed *after* computing reweighting factors!
+        self._data[OutPar.minuslogpost] = self.detempered_minuslogpost()
+        self.temperature = 1
+
     def filtered_copy(self, where) -> 'SampleCollection':
         return self._copy(self.data[where].reset_index(drop=True))
 
@@ -516,14 +595,25 @@ def thin_samples(self, thin, inplace=False) -> 'SampleCollection':
 
     def bestfit(self):
         """Best fit (maximum likelihood) sample. Returns a copy."""
-        return self.data.loc[self.data[OutPar.chi2].idxmin()].copy()
+        return self.data.loc[self.data[OutPar.chi2].astype(np.float64).idxmin()].copy()
+
+    def MAP(self, ignore_temperature: bool = False):
+        """
+        Maximum-a-posteriori (MAP) sample. Returns a copy.
 
-    def MAP(self):
-        """Maximum-a-posteriori (MAP) sample. Returns a copy."""
-        return self.data.loc[self.data[OutPar.minuslogpost].idxmin()].copy()
+        If ``ignore_temperature=True`` (default ``False``), it returns the maximum
+        tempered posterior, instead of the original one.
+        """
+        i_max_logpost = self.data[OutPar.minuslogpost].astype(np.float64).idxmin()
+        MAP = self.data.loc[i_max_logpost].copy()
+        if not ignore_temperature:
+            MAP[OutPar.minuslogpost] = self.detempered_minuslogpost(
+                first=i_max_logpost, last=i_max_logpost + 1)[0]
+        return MAP
 
     def sampled_to_getdist_mcsamples(
-        self, first: Optional[int] = None, last: Optional[int] = None) -> MCSamples:
+            self, first: Optional[int] = None, last: Optional[int] = None,
+            ignore_temperature: bool = False) -> MCSamples:
         """
         Basic interface with getdist -- internal use only!
         (For analysis and plotting use `getdist.mcsamples.MCSamplesFromCobaya
@@ -540,6 +630,8 @@ def sampled_to_getdist_mcsamples(
                 loglikes=self.data[OutPar.minuslogpost].to_numpy(
                     dtype=np.float64)[first:last],
                 names=names)
+            if not ignore_temperature:
+                mcsamples.cool(self.temperature)
         return mcsamples
 
     # Saving and updating
@@ -655,6 +747,10 @@ def add(self, values, logpost: LogPosterior):
     def logpost(self):
         return self.results.logpost
 
+    @property
+    def temperature(self):
+        return self.results.temperature
+
     def add_to_collection(self, collection: SampleCollection):
         """
         Adds this point at the end of a given collection.