From c9686758cceb7d5b0d376dbf64654ecb961e2bec Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Wed, 29 Nov 2023 12:56:48 -0800 Subject: [PATCH] delinting, black (#214) * delinting, black * Update src/qp/factory.py Sadly, we don't mangage pdfs Co-authored-by: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com> --------- Co-authored-by: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com> --- pyproject.toml | 1 + src/qp/__init__.py | 14 +- src/qp/conversion_funcs.py | 125 ++++++------ src/qp/dict_utils.py | 37 ++-- src/qp/ensemble.py | 116 ++++++----- src/qp/factory.py | 106 +++++----- src/qp/hist_pdf.py | 107 ++++++---- src/qp/interp_pdf.py | 170 ++++++++++------ src/qp/lazy_modules.py | 6 +- src/qp/metrics/__init__.py | 6 +- src/qp/metrics/array_metrics.py | 7 +- src/qp/metrics/brier.py | 2 +- src/qp/metrics/factory.py | 27 +-- src/qp/metrics/goodness_of_fit.py | 28 ++- src/qp/metrics/metrics.py | 158 ++++++++++----- src/qp/metrics/pit.py | 51 +++-- src/qp/mixmod_pdf.py | 107 ++++++---- src/qp/packed_interp_pdf.py | 152 +++++++++----- src/qp/packing_utils.py | 53 +++-- src/qp/pdf_gen.py | 56 +++--- src/qp/plotting.py | 126 +++++++----- src/qp/quant_pdf.py | 133 ++++++++----- .../cdf_spline_derivative.py | 4 +- .../dual_spline_average.py | 18 +- .../piecewise_constant.py | 6 +- .../piecewise_linear.py | 3 +- src/qp/scipy_pdfs.py | 26 ++- src/qp/sparse_pdf.py | 82 ++++---- src/qp/sparse_rep.py | 185 ++++++++++-------- src/qp/spline_pdf.py | 110 ++++++----- src/qp/test_data.py | 34 +++- src/qp/test_funcs.py | 105 +++++----- src/qp/utils.py | 125 ++++++++---- src/qp/version.py | 1 + tests/qp/benchmark.py | 57 +++--- tests/qp/fidelity.py | 44 +++-- tests/qp/quant_pdf/test_quant_pdf.py | 23 ++- .../test_cdf_spline_derivative.py | 69 +++++-- .../test_dual_spline_average.py | 67 +++++-- .../test_piecewise_constant.py | 52 +++-- .../test_piecewise_linear.py | 51 +++-- tests/qp/test_auto.py | 64 +++--- tests/qp/test_base_metric_classes.py | 13 +- tests/qp/test_brier.py | 69 ++++--- tests/qp/test_ensemble.py | 129 ++++++------ tests/qp/test_eval_funcs.py | 69 ++++--- tests/qp/test_infrastructure.py | 36 ++-- tests/qp/test_metric_factory.py | 11 +- tests/qp/test_metrics.py | 29 +-- tests/qp/test_parallel.py | 67 +++---- tests/qp/test_pit.py | 5 +- tests/qp/test_point_metrics.py | 21 +- tests/qp/test_scipy_vectorization.py | 9 +- tests/qp/test_utils.py | 77 +++++--- 54 files changed, 1980 insertions(+), 1269 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2a46a2a8..5466c877 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ disable = [ "duplicate-code", "use-dict-literal", "broad-exception-caught", + "consider-using-f-string", ] max-line-length = 110 max-locals = 50 diff --git a/src/qp/__init__.py b/src/qp/__init__.py index 5c31cc53..f9cfb46d 100644 --- a/src/qp/__init__.py +++ b/src/qp/__init__.py @@ -12,7 +12,19 @@ from .scipy_pdfs import * from .packed_interp_pdf import * from .ensemble import Ensemble -from .factory import instance, add_class, create, read, read_metadata, convert, concatenate, iterator, data_length, from_tables, is_qp_file +from .factory import ( + instance, + add_class, + create, + read, + read_metadata, + convert, + concatenate, + iterator, + data_length, + from_tables, + is_qp_file, +) from .lazy_modules import * from . import utils diff --git a/src/qp/conversion_funcs.py b/src/qp/conversion_funcs.py index 22f939db..8be2f566 100644 --- a/src/qp/conversion_funcs.py +++ b/src/qp/conversion_funcs.py @@ -7,8 +7,11 @@ from scipy import interpolate as sciinterp from .lazy_modules import mixture -from .sparse_rep import (build_sparse_representation, decode_sparse_indices, - indices2shapes) +from .sparse_rep import ( + build_sparse_representation, + decode_sparse_indices, + indices2shapes, +) def extract_vals_at_x(in_dist, **kwargs): @@ -29,8 +32,8 @@ def extract_vals_at_x(in_dist, **kwargs): data : `dict` The extracted data """ - xvals = kwargs.pop('xvals', None) - if xvals is None: # pragma: no cover + xvals = kwargs.pop("xvals", None) + if xvals is None: # pragma: no cover raise ValueError("To convert to extract_xy_vals you must specify xvals") yvals = in_dist.pdf(xvals) return dict(xvals=xvals, yvals=yvals) @@ -54,8 +57,8 @@ def extract_xy_vals(in_dist, **kwargs): data : `dict` The extracted data """ - xvals = kwargs.pop('xvals', None) - if xvals is None: # pragma: no cover + xvals = kwargs.pop("xvals", None) + if xvals is None: # pragma: no cover raise ValueError("To convert using extract_xy_vals you must specify xvals") yvals = in_dist.pdf(xvals) expand_x = np.ones(yvals.shape) * np.squeeze(xvals) @@ -80,8 +83,8 @@ def extract_samples(in_dist, **kwargs): data : `dict` The extracted data """ - samples = in_dist.rvs(size=kwargs.pop('size', 1000)) - xvals = kwargs.pop('xvals') + samples = in_dist.rvs(size=kwargs.pop("size", 1000)) + xvals = kwargs.pop("xvals") return dict(samples=samples, xvals=xvals, yvals=None) @@ -103,8 +106,8 @@ def extract_hist_values(in_dist, **kwargs): data : `dict` The extracted data """ - bins = kwargs.pop('bins', None) - if bins is None: # pragma: no cover + bins = kwargs.pop("bins", None) + if bins is None: # pragma: no cover raise ValueError("To convert using extract_hist_samples you must specify bins") bins, pdfs = in_dist.histogramize(bins) return dict(bins=bins, pdfs=pdfs) @@ -130,15 +133,18 @@ def extract_hist_samples(in_dist, **kwargs): data : `dict` The extracted data """ - bins = kwargs.pop('bins', None) - size = kwargs.pop('size', 1000) - if bins is None: # pragma: no cover + bins = kwargs.pop("bins", None) + size = kwargs.pop("size", 1000) + if bins is None: # pragma: no cover raise ValueError("To convert using extract_hist_samples you must specify bins") samples = in_dist.rvs(size=size) def hist_helper(sample): return np.histogram(sample, bins=bins)[0] - vv = np.vectorize(hist_helper, signature="(%i)->(%i)" % (samples.shape[0], bins.size-1)) + + vv = np.vectorize( + hist_helper, signature="(%i)->(%i)" % (samples.shape[0], bins.size - 1) + ) pdfs = vv(samples) return dict(bins=bins, pdfs=pdfs) @@ -161,14 +167,14 @@ def extract_quantiles(in_dist, **kwargs): data : `dict` The extracted data """ - quants = kwargs.pop('quants', None) - if quants is None: # pragma: no cover + quants = kwargs.pop("quants", None) + if quants is None: # pragma: no cover raise ValueError("To convert using extract_quantiles you must specify quants") locs = in_dist.ppf(quants) return dict(quants=quants, locs=locs) -def extract_fit(in_dist, **kwargs): # pragma: no cover +def extract_fit(in_dist, **kwargs): # pragma: no cover """Convert to a functional distribution by fitting it to a set of x and y values Parameters @@ -186,9 +192,9 @@ def extract_fit(in_dist, **kwargs): # pragma: no cover data : `dict` The extracted data """ - raise NotImplementedError('extract_fit') - #xvals = kwargs.pop('xvals', None) - #if xvals is None: + raise NotImplementedError("extract_fit") + # xvals = kwargs.pop('xvals', None) + # if xvals is None: # raise ValueError("To convert using extract_fit you must specify xvals") ##vals = in_dist.pdf(xvals) @@ -215,10 +221,11 @@ def extract_mixmod_fit_samples(in_dist, **kwargs): data : `dict` The extracted data """ - n_comps = kwargs.pop('ncomps', 3) - n_sample = kwargs.pop('nsamples', 1000) - random_state = kwargs.pop('random_state', None) + n_comps = kwargs.pop("ncomps", 3) + n_sample = kwargs.pop("nsamples", 1000) + random_state = kwargs.pop("random_state", None) samples = in_dist.rvs(size=n_sample, random_state=random_state) + def mixmod_helper(samps): estimator = mixture.GaussianMixture(n_components=n_comps) estimator.fit(samps.reshape(-1, 1)) @@ -230,9 +237,12 @@ def mixmod_helper(samps): vv = np.vectorize(mixmod_helper, signature="(%i)->(3,%i)" % (n_sample, n_comps)) fit_vals = vv(samples) - return dict(weights=fit_vals[:, 0, :], means=fit_vals[:, 1, :], stds=fit_vals[:, 2, :]) + return dict( + weights=fit_vals[:, 0, :], means=fit_vals[:, 1, :], stds=fit_vals[:, 2, :] + ) -def extract_voigt_mixmod(in_dist, **kwargs): #pragma: no cover + +def extract_voigt_mixmod(in_dist, **kwargs): # pragma: no cover """Convert to a voigt mixture model starting with a gaussian mixture model, trivially by setting gammas to 0 @@ -247,14 +257,14 @@ def extract_voigt_mixmod(in_dist, **kwargs): #pragma: no cover The extracted data """ objdata = in_dist.objdata() - means = objdata['means'] - stds = objdata['stds'] - weights = objdata['weights'] + means = objdata["means"] + stds = objdata["stds"] + weights = objdata["weights"] gammas = np.zeros_like(means) return dict(means=means, stds=stds, weights=weights, gammas=gammas, **kwargs) -def extract_voigt_xy(in_dist, **kwargs): #pragma: no cover +def extract_voigt_xy(in_dist, **kwargs): # pragma: no cover """Build a voigt function basis and run a match-pursuit algorithm to fit gridded data Parameters @@ -269,14 +279,14 @@ def extract_voigt_xy(in_dist, **kwargs): #pragma: no cover """ sparse_results = extract_voigt_xy_sparse(in_dist, **kwargs) - indices = sparse_results['indices'] - meta = sparse_results['metadata'] + indices = sparse_results["indices"] + meta = sparse_results["metadata"] w, m, s, g = indices2shapes(indices, meta) return dict(means=m, stds=s, weights=w, gammas=g) -def extract_voigt_xy_sparse(in_dist, **kwargs): #pragma: no cover +def extract_voigt_xy_sparse(in_dist, **kwargs): # pragma: no cover """Build a voigt function basis and run a match-pursuit algorithm to fit gridded data Parameters @@ -290,11 +300,11 @@ def extract_voigt_xy_sparse(in_dist, **kwargs): #pragma: no cover The extracted data as shaped parameters means, stds, weights, gammas """ - yvals = in_dist.objdata()['yvals'] + yvals = in_dist.objdata()["yvals"] - default = in_dist.metadata()['xvals'][0] - z = kwargs.pop('xvals', default) - nz = kwargs.pop('nz', 300) + default = in_dist.metadata()["xvals"][0] + z = kwargs.pop("xvals", default) + nz = kwargs.pop("nz", 300) minz = np.min(z) _, j = np.where(yvals > 0) @@ -306,7 +316,8 @@ def extract_voigt_xy_sparse(in_dist, **kwargs): #pragma: no cover ALL, bigD, _ = build_sparse_representation(newz, newpdf) return dict(indices=ALL, metadata=bigD) -def extract_sparse_from_xy(in_dist, **kwargs): #pragma: no cover + +def extract_sparse_from_xy(in_dist, **kwargs): # pragma: no cover """Extract sparse representation from an xy interpolated representation Parameters @@ -333,12 +344,12 @@ def extract_sparse_from_xy(in_dist, **kwargs): #pragma: no cover This function will rebin to a grid more suited to the in_dist support by removing x-values corrsponding to y=0 """ - default = in_dist.objdata()['yvals'] - yvals = kwargs.pop('yvals', default) - default = in_dist.metadata()['xvals'][0] - xvals = kwargs.pop('xvals', default) - nvals = kwargs.pop('nvals', 300) - #rebin to a grid more suited to the in_dist support + default = in_dist.objdata()["yvals"] + yvals = kwargs.pop("yvals", default) + default = in_dist.metadata()["xvals"][0] + xvals = kwargs.pop("xvals", default) + nvals = kwargs.pop("nvals", 300) + # rebin to a grid more suited to the in_dist support xmin = np.min(xvals) _, j = np.where(yvals > 0) xmax = np.max(xvals[j]) @@ -346,13 +357,13 @@ def extract_sparse_from_xy(in_dist, **kwargs): #pragma: no cover interp = sciinterp.interp1d(xvals, yvals, assume_sorted=True) newpdf = interp(newx) sparse_indices, metadata, _ = build_sparse_representation(newx, newpdf) - metadata['xvals'] = newx - metadata['sparse_indices'] = sparse_indices - metadata.pop('Ntot') + metadata["xvals"] = newx + metadata["sparse_indices"] = sparse_indices + metadata.pop("Ntot") return metadata -def extract_xy_sparse(in_dist, **kwargs): #pragma: no cover +def extract_xy_sparse(in_dist, **kwargs): # pragma: no cover """Extract xy-interpolated representation from an sparese representation Parameters @@ -380,11 +391,11 @@ def extract_xy_sparse(in_dist, **kwargs): #pragma: no cover removing x-values corrsponding to y=0 """ - yvals = in_dist.objdata()['yvals'] - default = in_dist.metadata()['xvals'][0] - xvals = kwargs.pop('xvals', default) - nvals = kwargs.pop('nvals', 300) - #rebin to a grid more suited to the in_dist support + yvals = in_dist.objdata()["yvals"] + default = in_dist.metadata()["xvals"][0] + xvals = kwargs.pop("xvals", default) + nvals = kwargs.pop("nvals", 300) + # rebin to a grid more suited to the in_dist support xmin = np.min(xvals) _, j = np.where(yvals > 0) xmax = np.max(xvals[j]) @@ -392,16 +403,16 @@ def extract_xy_sparse(in_dist, **kwargs): #pragma: no cover interp = sciinterp.interp1d(xvals, yvals, assume_sorted=True) newpdf = interp(newx) sparse_indices, sparse_meta, A = build_sparse_representation(newx, newpdf) - #decode the sparse indices into basis indices and weights + # decode the sparse indices into basis indices and weights basis_indices, weights = decode_sparse_indices(sparse_indices) - #retrieve the weighted array of basis functions for each object + # retrieve the weighted array of basis functions for each object pdf_y = A[:, basis_indices] * weights - #normalize and sum the weighted pdfs - x = sparse_meta['z'] + # normalize and sum the weighted pdfs + x = sparse_meta["z"] y = pdf_y.sum(axis=-1) norms = sciint.trapz(y.T, x) y /= norms - #super(sparse_gen, self).__init__(x, y.T, *args, **kwargs) + # super(sparse_gen, self).__init__(x, y.T, *args, **kwargs) xvals = x yvals = y.T return dict(xvals=xvals, yvals=yvals, **kwargs) diff --git a/src/qp/dict_utils.py b/src/qp/dict_utils.py index 25e40bb3..189767c2 100644 --- a/src/qp/dict_utils.py +++ b/src/qp/dict_utils.py @@ -88,9 +88,9 @@ def pretty_print(in_dict, prefixes, idx=0, stream=sys.stdout): key_str = "default" else: key_str = key - if isinstance(val, dict): #pragma: no cover + if isinstance(val, dict): # pragma: no cover stream.write("%s%s:\n" % (prefix, key_str)) - pretty_print(val, prefixes, idx+1, stream) + pretty_print(val, prefixes, idx + 1, stream) else: stream.write("%s%s : %s\n" % (prefix, key_str, val)) @@ -138,12 +138,14 @@ def check_keys(in_dicts): Raises KeyError if one does not match. """ - if not in_dicts: #pragma: no cover + if not in_dicts: # pragma: no cover return master_keys = in_dicts[0].keys() for in_dict in in_dicts[1:]: - if in_dict.keys() != master_keys: #pragma: no cover - raise ValueError("Keys to not match: %s != %s" % (in_dict.keys(), master_keys)) + if in_dict.keys() != master_keys: # pragma: no cover + raise ValueError( + "Keys to not match: %s != %s" % (in_dict.keys(), master_keys) + ) def concatenate_dicts(in_dicts): @@ -159,10 +161,10 @@ def concatenate_dicts(in_dicts): out_dict : `dict` The stacked dicionary """ - if not in_dicts: #pragma: no cover + if not in_dicts: # pragma: no cover return {} check_keys(in_dicts) - out_dict = { key : None for key in in_dicts[0].keys() } + out_dict = {key: None for key in in_dicts[0].keys()} for key in out_dict.keys(): out_dict[key] = np.concatenate([in_dict[key] for in_dict in in_dicts]) return out_dict @@ -176,11 +178,14 @@ def check_array_shapes(in_dict, npdf): if in_dict is None: return for key, val in in_dict.items(): - if np.size(val) == 1 and npdf == 1: #pragma: no cover + if np.size(val) == 1 and npdf == 1: # pragma: no cover continue - if np.shape(val)[0] != npdf: #pragma: no cover - raise ValueError("First dimension of array %s does not match npdf: %i != %i" % - (key, np.shape(val)[0], npdf)) + if np.shape(val)[0] != npdf: # pragma: no cover + raise ValueError( + "First dimension of array %s does not match npdf: %i != %i" + % (key, np.shape(val)[0], npdf) + ) + def compare_two_dicts(d1, d2): """Check that all the items in d1 and d2 match @@ -190,15 +195,15 @@ def compare_two_dicts(d1, d2): match : `bool` True if they all match, False otherwise """ - if d1.keys() != d2.keys(): #pragma: no cover + if d1.keys() != d2.keys(): # pragma: no cover return False for k, v in d1.items(): vv = d2[k] try: - if v != vv: #pragma: no cover + if v != vv: # pragma: no cover return False except ValueError: - if not np.allclose(v, vv): #pragma: no cover + if not np.allclose(v, vv): # pragma: no cover return False return True @@ -211,10 +216,10 @@ def compare_dicts(in_dicts): match : `bool` True if they all match, False otherwise """ - if not in_dicts: #pragma: no cover + if not in_dicts: # pragma: no cover return True first_dict = in_dicts[0] for in_dict in in_dicts[1:]: - if not compare_two_dicts(first_dict, in_dict): #pragma: no cover + if not compare_two_dicts(first_dict, in_dict): # pragma: no cover return False return True diff --git a/src/qp/ensemble.py b/src/qp/ensemble.py index 8cb10fdb..a3e4bba7 100755 --- a/src/qp/ensemble.py +++ b/src/qp/ensemble.py @@ -5,19 +5,21 @@ import numpy as np from tables_io import io -from qp.dict_utils import (check_array_shapes, compare_dicts, - concatenate_dicts, slice_dict) +from qp.dict_utils import ( + check_array_shapes, + compare_dicts, + concatenate_dicts, + slice_dict, +) from qp.metrics import quick_moment # import psutil -#import timeit - - - +# import timeit class Ensemble: """An object comprised of many qp.PDF objects to efficiently perform operations on all of them""" + def __init__(self, gen_func, data, ancil=None): """Class constructor @@ -29,7 +31,7 @@ def __init__(self, gen_func, data, ancil=None): Dictionary with data used to construct the ensemble """ - #start_time = timeit.default_timer() + # start_time = timeit.default_timer() self._gen_func = gen_func self._frozen = self._gen_func(**data) self._gen_obj = self._frozen.dist @@ -56,8 +58,8 @@ def __getitem__(self, key): """ red_data = {} md = self.metadata() - md.pop('pdf_name') - md.pop('pdf_version') + md.pop("pdf_name") + md.pop("pdf_version") for k, v in md.items(): red_data[k] = np.squeeze(v) dd = slice_dict(self.objdata(), key) @@ -129,7 +131,7 @@ def convert_to(self, to_class, **kwargs): ---------- to_class : `class` Class to convert to - **kwargs : + **kwargs : keyword arguments are passed to the output class constructor Other Parameters @@ -146,11 +148,17 @@ def convert_to(self, to_class, **kwargs): method = kwds.pop("method", None) ctor_func = to_class.creation_method(method) class_name = to_class.name - if ctor_func is None: #pragma: no cover - raise KeyError("Class named %s does not have a creation_method named %s" % (class_name, method)) + if ctor_func is None: # pragma: no cover + raise KeyError( + "Class named %s does not have a creation_method named %s" + % (class_name, method) + ) extract_func = to_class.extraction_method(method) - if extract_func is None: #pragma: no cover - raise KeyError("Class named %s does not have a extraction_method named %s" % (class_name, method)) + if extract_func is None: # pragma: no cover + raise KeyError( + "Class named %s does not have a extraction_method named %s" + % (class_name, method) + ) data = extract_func(self, **kwds) return Ensemble(ctor_func, data=data) @@ -178,7 +186,7 @@ def update_objdata(self, data, ancil=None): """ new_data = {} for k, v in self.metadata().items(): - if k in ['pdf_name', 'pdf_version']: + if k in ["pdf_name", "pdf_version"]: continue new_data[k] = np.squeeze(v) new_data.update(self.objdata()) @@ -218,7 +226,7 @@ def objdata(self): dd = {} dd.update(self._frozen.kwds) - dd.pop('row', None) + dd.pop("row", None) dd.update(self._gen_obj.objdata) return dd @@ -239,7 +247,7 @@ def set_ancil(self, ancil): self._ancil = ancil def add_to_ancil(self, to_add): # pragma: no cover - """ Add additionaly columns to the ancillary data dict + """Add additionaly columns to the ancillary data dict Parameters ---------- @@ -256,16 +264,17 @@ def add_to_ancil(self, to_add): # pragma: no cover check_array_shapes(to_add, self.npdf) self._ancil.update(to_add) - def append(self, other_ens): - """ Append another other_ens to this one + """Append another other_ens to this one Parameters ---------- other_ens : `qp.Ensemble` The other Ensemble """ - if not compare_dicts([self.metadata(), other_ens.metadata()]): # pragma: no cover + if not compare_dicts( + [self.metadata(), other_ens.metadata()] + ): # pragma: no cover raise KeyError("Metadata does not match, can not append") full_objdata = concatenate_dicts([self.objdata(), other_ens.objdata()]) if self._ancil is not None and other_ens.ancil is not None: # pragma: no cover @@ -274,9 +283,8 @@ def append(self, other_ens): full_ancil = None self.update_objdata(full_objdata, full_ancil) - def build_tables(self): - """ Return dicts of numpy arrays for the meta data and object data + """Return dicts of numpy arrays for the meta data and object data for this ensemble Returns @@ -288,7 +296,7 @@ def build_tables(self): """ dd = dict(meta=self.metadata(), data=self.objdata()) if self.ancil is not None: - dd['ancil'] = self.ancil + dd["ancil"] = self.ancil return dd def mode(self, grid): @@ -379,7 +387,6 @@ def logpdf(self, x): """ return self._frozen.logpdf(x) - def cdf(self, x): """ Evaluates the cumalative distribution function for the whole ensemble @@ -422,7 +429,6 @@ def ppf(self, q): """ return self._frozen.ppf(q) - def sf(self, q): """ Evaluates the survival fraction of the distribution @@ -478,9 +484,11 @@ def rvs(self, size=None, random_state=None): Returns ------- """ - return self._frozen.rvs(size=(self._frozen.npdf, size), random_state=random_state) + return self._frozen.rvs( + size=(self._frozen.npdf, size), random_state=random_state + ) - def stats(self, moments='mv'): + def stats(self, moments="mv"): """ Retrun the stats for this ensemble @@ -495,42 +503,41 @@ def stats(self, moments='mv'): return self._frozen.stats(moments=moments) def median(self): - """ Return the medians for this ensemble """ + """Return the medians for this ensemble""" return self._frozen.median() def mean(self): - """ Return the means for this ensemble """ + """Return the means for this ensemble""" return self._frozen.mean() def var(self): - """ Return the variences for this ensemble """ + """Return the variences for this ensemble""" return self._frozen.var() def std(self): - """ Return the standard deviations for this ensemble """ + """Return the standard deviations for this ensemble""" return self._frozen.std() def moment(self, n): - """ Return the nth moments for this ensemble """ + """Return the nth moments for this ensemble""" return self._frozen.moment(n) def entropy(self): - """ Return the entropy for this ensemble """ + """Return the entropy for this ensemble""" return self._frozen.entropy() - #def pmf(self, k): + # def pmf(self, k): # """ Return the kth pmf for this ensemble """ # return self._frozen.pmf(k) - #def logpmf(self, k): + # def logpmf(self, k): # """ Return the log of the kth pmf for this ensemble """ # return self._frozen.logpmf(k) def interval(self, alpha): - """ Return the intervals corresponding to a confidnce level of alpha for this ensemble""" + """Return the intervals corresponding to a confidnce level of alpha for this ensemble""" return self._frozen.interval(alpha) - def histogramize(self, bins): """ Computes integrated histogram bin values for all PDFs @@ -568,8 +575,7 @@ def integrate(self, limits): """ return self.cdf(limits[1]) - self.cdf(limits[0]) - - def mix_mod_fit(self, comps=5): #pragma: no cover + def mix_mod_fit(self, comps=5): # pragma: no cover """ Fits the parameters of a given functional form to an approximation @@ -593,9 +599,8 @@ def mix_mod_fit(self, comps=5): #pragma: no cover """ raise NotImplementedError("mix_mod_fit %i" % comps) - def moment_partial(self, n, limits, dx=0.01): - """ Return the nth moments for this over a particular range""" + """Return the nth moments for this over a particular range""" D = int((limits[-1] - limits[0]) / dx) grid = np.linspace(limits[0], limits[1], D) # dx = (limits[-1] - limits[0]) / (D - 1) @@ -628,8 +633,8 @@ def _get_allocation_kwds(self, npdf): tables = self.build_tables() keywords = {} for group, tab in tables.items(): - if group != 'meta': - keywords[group]={} + if group != "meta": + keywords[group] = {} for key, array in tab.items(): shape = list(array.shape) shape[0] = npdf @@ -642,11 +647,12 @@ def initializeHdf5Write(self, filename, npdf, comm=None): Parameters ---------- - filename : `str` + filename : `str` Name of the file to create - npdf : `int` - Total number of pdfs that will contain the file, usually larger then the size of the current ensemble - comm : `MPI communicator` + npdf : `int` + Total number of pdfs that will contain the file, + usually larger then the size of the current ensemble + comm : `MPI communicator` Optional MPI communicator to allow parallel writing """ kwds = self._get_allocation_kwds(npdf) @@ -658,15 +664,15 @@ def writeHdf5Chunk(self, fname, start, end): Parameters ---------- - fname : h5py `File object` + fname : h5py `File object` file or group - start : `int` + start : `int` starting index of h5py file - end : `int` + end : `int` ending index in h5py file """ odict = self.build_tables().copy() - odict.pop('meta') + odict.pop("meta") io.writeDictToHdf5Chunk(fname, odict, start, end) def finalizeHdf5Write(self, filename): @@ -674,11 +680,11 @@ def finalizeHdf5Write(self, filename): Parameters ---------- - filename : h5py `File object` + filename : h5py `File object` file or group """ mdata = self.metadata() - io.finalizeHdf5Write(filename, 'meta', **mdata) + io.finalizeHdf5Write(filename, "meta", **mdata) # def stack(self, loc, using, vb=True): # """ @@ -718,7 +724,9 @@ def finalizeHdf5Write(self, filename): # self.stacked[using] = (evaluated[0], stack) # return self.stacked + # Note: A copious quantity of commented code has been removed in this commit! # For future reference, it can still be found here: # https://github.com/aimalz/qp/blob/d8d145af9514e29c76e079e869b8b4923f592f40/qp/ensemble.py -# Critical additions still remain. Metrics of individual qp.PDF objects collected in aggregate over a qp.Ensemble are still desired. +# Critical additions still remain. Metrics of individual qp.PDF objects collected in aggregate +# over a qp.Ensemble are still desired. diff --git a/src/qp/factory.py b/src/qp/factory.py index ab91738c..a8ef2c05 100644 --- a/src/qp/factory.py +++ b/src/qp/factory.py @@ -20,9 +20,8 @@ class Factory(OrderedDict): - """Factory that creates and mangages PDFs + """Factory that creates and manages PDFs""" - """ def __init__(self): """C'tor""" super().__init__() @@ -34,7 +33,6 @@ def _build_data_dict(md_table, data_table): data_dict = {} for col, col_data in md_table.items(): - ndim = np.ndim(col_data) if ndim > 1: @@ -50,7 +48,7 @@ def _build_data_dict(md_table, data_table): data_dict[col] = col_data for col, col_data in data_table.items(): - if len(col_data.shape) < 2: #pragma: no cover + if len(col_data.shape) < 2: # pragma: no cover data_dict[col] = np.expand_dims(col_data, -1) else: data_dict[col] = col_data @@ -59,10 +57,12 @@ def _build_data_dict(md_table, data_table): def _make_scipy_wrapped_class(self, class_name, scipy_class): """Build a qp class from a scipy class""" # pylint: disable=protected-access - override_dict = dict(name=class_name, - version=0, - freeze=Pdf_gen_wrap._my_freeze, - _other_init=scipy_class.__init__) + override_dict = dict( + name=class_name, + version=0, + freeze=Pdf_gen_wrap._my_freeze, + _other_init=scipy_class.__init__, + ) the_class = type(class_name, (Pdf_gen_wrap, scipy_class), override_dict) self.add_class(the_class) @@ -82,19 +82,24 @@ def add_class(self, the_class): the_class : class The class we are adding, must inherit from Pdf_Gen """ - #if not isinstance(the_class, Pdf_gen): #pragma: no cover + # if not isinstance(the_class, Pdf_gen): #pragma: no cover # raise TypeError("Can only add sub-classes of Pdf_Gen to factory") - if not hasattr(the_class, 'name'): #pragma: no cover - raise AttributeError("Can not add class %s to factory because it doesn't have a name attribute" % the_class) - if the_class.name in self: #pragma: no cover - raise KeyError("Class nameed %s is already in factory, point to %s" % (the_class.name, self[the_class.name])) + if not hasattr(the_class, "name"): # pragma: no cover + raise AttributeError( + "Can not add class %s to factory because it doesn't have a name attribute" + % the_class + ) + if the_class.name in self: # pragma: no cover + raise KeyError( + "Class nameed %s is already in factory, point to %s" + % (the_class.name, self[the_class.name]) + ) the_class.add_method_dicts() the_class.add_mappings() self[the_class.name] = the_class setattr(self, "%s_gen" % the_class.name, the_class) setattr(self, the_class.name, the_class.create) - def create(self, class_name, data, method=None): """Make an ensemble of a particular type of distribution @@ -112,7 +117,7 @@ def create(self, class_name, data, method=None): ens : `qp.Ensemble` The newly created ensemble """ - if class_name not in self: #pragma: no cover + if class_name not in self: # pragma: no cover raise KeyError("Class nameed %s is not in factory" % class_name) the_class = self[class_name] ctor_func = the_class.creation_method(method) @@ -130,21 +135,21 @@ def from_tables(self, tables): This will use information in the meta data table to figure out how to construct the data need to build the ensemble. """ - md_table = tables['meta'] - data_table = tables['data'] - ancil_table = tables.get('ancil') + md_table = tables["meta"] + data_table = tables["data"] + ancil_table = tables.get("ancil") data = self._build_data_dict(md_table, data_table) - pdf_name = data.pop('pdf_name') - pdf_version = data.pop('pdf_version') - if pdf_name not in self: #pragma: no cover + pdf_name = data.pop("pdf_name") + pdf_version = data.pop("pdf_version") + if pdf_name not in self: # pragma: no cover raise KeyError("Class nameed %s is not in factory" % pdf_name) the_class = self[pdf_name] reader_convert = the_class.reader_method(pdf_version) ctor_func = the_class.creation_method(None) - if reader_convert is not None: #pragma: no cover + if reader_convert is not None: # pragma: no cover data = reader_convert(data) return Ensemble(ctor_func, data=data, ancil=ancil_table) @@ -155,7 +160,7 @@ def read_metadata(self, filename): ---------- filename : `str` """ - tables = io.read(filename, NUMPY_DICT, keys=['meta']) + tables = io.read(filename, NUMPY_DICT, keys=["meta"]) return tables["meta"] def is_qp_file(self, filename): @@ -173,9 +178,9 @@ def is_qp_file(self, filename): """ try: # If this isn't a table-like file with a 'meta' table this will throw an exception - tables = io.readNative(filename, keys=['meta']) + tables = io.readNative(filename, keys=["meta"]) # If the 'meta' tables doesn't have 'pdf_name' or it is empty this will throw an exception or fail - return len(tables['meta']['pdf_name']) > 0 + return len(tables["meta"]["pdf_name"]) > 0 except Exception as msg: # Any exception means it isn't a qp file print(f"This is not a qp file because {msg}") @@ -194,15 +199,16 @@ def read(self, filename): need to build the ensemble. """ _, ext = os.path.splitext(filename) - if ext in ['.pq']: - keys = ['data', 'meta', 'ancil'] + if ext in [".pq"]: + keys = ["data", "meta", "ancil"] allow_missing_keys = True else: keys = None allow_missing_keys = False - tables = io.read(filename, NUMPY_DICT, keys=keys, - allow_missing_keys=allow_missing_keys) #pylint: disable=no-member + tables = io.read( + filename, NUMPY_DICT, keys=keys, allow_missing_keys=allow_missing_keys + ) # pylint: disable=no-member return self.from_tables(tables) @@ -217,7 +223,7 @@ def data_length(self, filename): ------- nrows : `int` """ - f, _ = io.readHdf5Group(filename, 'data') + f, _ = io.readHdf5Group(filename, "data") num_rows = io.getGroupInputDataLength(f) return num_rows @@ -231,22 +237,22 @@ def iterator(self, filename, chunk_size=100_000, rank=0, parallel_size=1): chunk_size : `int` """ extension = os.path.splitext(filename)[1] - if extension not in ['.hdf5']: #pragma: no cover + if extension not in [".hdf5"]: # pragma: no cover raise TypeError("Can only use qp.iterator on hdf5 files") - metadata = io.readHdf5ToDict(filename, 'meta') - pdf_name = metadata.pop('pdf_name')[0].decode() - pdf_version = metadata.pop('pdf_version')[0] - if pdf_name not in self: #pragma: no cover + metadata = io.readHdf5ToDict(filename, "meta") + pdf_name = metadata.pop("pdf_name")[0].decode() + _pdf_version = metadata.pop("pdf_version")[0] + if pdf_name not in self: # pragma: no cover raise KeyError("Class nameed %s is not in factory" % pdf_name) the_class = self[pdf_name] # reader_convert = the_class.reader_method(pdf_version) ctor_func = the_class.creation_method(None) - f, infp = io.readHdf5Group(filename, 'data') + f, infp = io.readHdf5Group(filename, "data") try: - ancil_f, ancil_infp = io.readHdf5Group(filename, 'ancil') - except KeyError: #pragma: no cover + ancil_f, ancil_infp = io.readHdf5Group(filename, "ancil") + except KeyError: # pragma: no cover ancil_f, ancil_infp = (None, None) num_rows = io.getGroupInputDataLength(f) ranges = io.data_ranges_by_rank(num_rows, chunk_size, parallel_size, rank) @@ -280,18 +286,20 @@ def convert(self, in_dist, class_name, **kwds): """ kwds_copy = kwds.copy() method = kwds_copy.pop("method", None) - if class_name not in self: #pragma: no cover + if class_name not in self: # pragma: no cover raise KeyError("Class nameed %s is not in factory" % class_name) - if class_name not in self: #pragma: no cover + if class_name not in self: # pragma: no cover raise KeyError("Class nameed %s is not in factory" % class_name) the_class = self[class_name] extract_func = the_class.extraction_method(method) - if extract_func is None: #pragma: no cover - raise KeyError("Class named %s does not have a extraction_method named %s" % (class_name, method)) + if extract_func is None: # pragma: no cover + raise KeyError( + "Class named %s does not have a extraction_method named %s" + % (class_name, method) + ) data = extract_func(in_dist, **kwds_copy) return self.create(class_name, data, method) - def pretty_print(self, stream=sys.stdout): """Print a level of the converstion dictionary in a human-readable format @@ -319,7 +327,7 @@ def concatenate(ensembles): ens : `qp.Ensemble` The output """ - if not ensembles: #pragma: no cover + if not ensembles: # pragma: no cover return None metadata_list = [] objdata_list = [] @@ -333,18 +341,18 @@ def concatenate(ensembles): if ancil_list is not None: if ensemble.ancil is None: ancil_list = None - else: #pragma: no cover + else: # pragma: no cover ancil_list.append(ensemble.ancil) - if not compare_dicts(metadata_list): #pragma: no cover + if not compare_dicts(metadata_list): # pragma: no cover raise ValueError("Metadata does not match") metadata = metadata_list[0] data = concatenate_dicts(objdata_list) - if ancil_list is not None: #pragma: no cover + if ancil_list is not None: # pragma: no cover ancil = concatenate_dicts(ancil_list) else: ancil = None for k, v in metadata.items(): - if k in ['pdf_name', 'pdf_version']: + if k in ["pdf_name", "pdf_version"]: continue data[k] = np.squeeze(v) return Ensemble(gen_func, data, ancil) @@ -352,10 +360,12 @@ def concatenate(ensembles): _FACTORY = Factory() + def instance(): """Return the factory instance""" return _FACTORY + stats = _FACTORY add_class = _FACTORY.add_class create = _FACTORY.create diff --git a/src/qp/hist_pdf.py b/src/qp/hist_pdf.py index 8c5c7622..31956099 100644 --- a/src/qp/hist_pdf.py +++ b/src/qp/hist_pdf.py @@ -9,14 +9,16 @@ from qp.pdf_gen import Pdf_rows_gen from qp.conversion_funcs import extract_hist_values, extract_hist_samples from qp.plotting import get_axes_and_xlims, plot_pdf_histogram_on_axes -from qp.utils import evaluate_hist_x_multi_y,\ - interpolate_multi_x_y, interpolate_x_multi_y,\ - reshape_to_pdf_size +from qp.utils import ( + evaluate_hist_x_multi_y, + interpolate_multi_x_y, + interpolate_x_multi_y, + reshape_to_pdf_size, +) from qp.test_data import XBINS, HIST_DATA, TEST_XVALS, NSAMPLES from qp.factory import add_class - class hist_gen(Pdf_rows_gen): """Histogram based distribution @@ -40,9 +42,10 @@ class hist_gen(Pdf_rows_gen): ppf(0) will return bins[0] ppf(1) will return bins[-1] """ + # pylint: disable=protected-access - name = 'hist' + name = "hist" version = 0 _support_mask = rv_continuous._support_mask @@ -64,31 +67,32 @@ def __init__(self, bins, pdfs, *args, **kwargs): self._hbin_widths = self._hbins[1:] - self._hbins[:-1] self._xmin = self._hbins[0] self._xmax = self._hbins[-1] - if np.shape(pdfs)[-1] != self._nbins: # pragma: no cover - raise ValueError("Number of bins (%i) != number of values (%i)" % - (self._nbins, np.shape(pdfs)[-1])) + if np.shape(pdfs)[-1] != self._nbins: # pragma: no cover + raise ValueError( + "Number of bins (%i) != number of values (%i)" + % (self._nbins, np.shape(pdfs)[-1]) + ) - check_input = kwargs.pop('check_input', True) + check_input = kwargs.pop("check_input", True) if check_input: pdfs_2d = reshape_to_pdf_size(pdfs, -1) - sums = np.sum(pdfs_2d*self._hbin_widths, axis=1) + sums = np.sum(pdfs_2d * self._hbin_widths, axis=1) self._hpdfs = (pdfs_2d.T / sums).T - else: #pragma: no cover + else: # pragma: no cover self._hpdfs = reshape_to_pdf_size(pdfs, -1) self._hcdfs = None # Set support - kwargs['shape'] = pdfs.shape[:-1] + kwargs["shape"] = pdfs.shape[:-1] super().__init__(*args, **kwargs) - self._addmetadata('bins', self._hbins) - self._addobjdata('pdfs', self._hpdfs) - + self._addmetadata("bins", self._hbins) + self._addobjdata("pdfs", self._hpdfs) def _compute_cdfs(self): copy_shape = np.array(self._hpdfs.shape) copy_shape[-1] += 1 self._hcdfs = np.ndarray(copy_shape) - self._hcdfs[:,0] = 0. - self._hcdfs[:,1:] = np.cumsum(self._hpdfs * self._hbin_widths, axis=1) + self._hcdfs[:, 0] = 0.0 + self._hcdfs[:, 1:] = np.cumsum(self._hpdfs * self._hbin_widths, axis=1) @property def bins(self): @@ -106,31 +110,38 @@ def _pdf(self, x, row): def _cdf(self, x, row): # pylint: disable=arguments-differ - if self._hcdfs is None: #pragma: no cover + if self._hcdfs is None: # pragma: no cover self._compute_cdfs() - return interpolate_x_multi_y(x, row, self._hbins, self._hcdfs, - bounds_error=False, fill_value=(0.,1.)).ravel() + return interpolate_x_multi_y( + x, row, self._hbins, self._hcdfs, bounds_error=False, fill_value=(0.0, 1.0) + ).ravel() def _ppf(self, x, row): # pylint: disable=arguments-differ - if self._hcdfs is None: #pragma: no cover + if self._hcdfs is None: # pragma: no cover self._compute_cdfs() - return interpolate_multi_x_y(x, row, self._hcdfs, self._hbins, - bounds_error=False, fill_value=(self._xmin, self._xmax)).ravel() + return interpolate_multi_x_y( + x, + row, + self._hcdfs, + self._hbins, + bounds_error=False, + fill_value=(self._xmin, self._xmax), + ).ravel() def _munp(self, m, *args): - """ compute moments """ + """compute moments""" # pylint: disable=arguments-differ # Silence floating point warnings from integration. - with np.errstate(all='ignore'): + with np.errstate(all="ignore"): vals = self.custom_generic_moment(m) return vals def custom_generic_moment(self, m): - """ Compute the mth moment """ + """Compute the mth moment""" m = np.asarray(m) dx = self._hbins[1] - self._hbins[0] - xv = 0.5*(self._hbins[1:] + self._hbins[:-1]) + xv = 0.5 * (self._hbins[1:] + self._hbins[:-1]) return np.sum(xv**m * self._hpdfs, axis=1) * dx def _updated_ctor_param(self): @@ -138,16 +149,16 @@ def _updated_ctor_param(self): Set the bins as additional constructor argument """ dct = super()._updated_ctor_param() - dct['bins'] = self._hbins - dct['pdfs'] = self._hpdfs + dct["bins"] = self._hbins + dct["pdfs"] = self._hpdfs return dct @classmethod def get_allocation_kwds(cls, npdf, **kwargs): - if 'bins' not in kwargs: #pragma: no cover + if "bins" not in kwargs: # pragma: no cover raise ValueError("required argument 'bins' not included in kwargs") - nbins = len(kwargs['bins'].flatten()) - return dict(pdfs=((npdf, nbins-1), 'f4')) + nbins = len(kwargs["bins"].flatten()) + return dict(pdfs=((npdf, nbins - 1), "f4")) @classmethod def plot_native(cls, pdf, **kwargs): @@ -156,10 +167,9 @@ def plot_native(cls, pdf, **kwargs): For a histogram this shows the bin edges """ axes, _, kw = get_axes_and_xlims(**kwargs) - vals = pdf.dist.pdfs[pdf.kwds['row']] + vals = pdf.dist.pdfs[pdf.kwds["row"]] return plot_pdf_histogram_on_axes(axes, hist=(pdf.dist.bins, vals), **kw) - @classmethod def add_mappings(cls): """ @@ -169,17 +179,28 @@ def add_mappings(cls): cls._add_extraction_method(extract_hist_values, None) cls._add_extraction_method(extract_hist_samples, "samples") - @classmethod def make_test_data(cls): - """ Make data for unit tests """ - cls.test_data = dict(hist=dict(gen_func=hist, ctor_data=dict(bins=XBINS, pdfs=HIST_DATA),\ - convert_data=dict(bins=XBINS), atol_diff=1e-1, atol_diff2=1e-1, test_xvals=TEST_XVALS), - hist_samples=dict(gen_func=hist, ctor_data=dict(bins=XBINS, pdfs=HIST_DATA),\ - convert_data=dict(bins=XBINS, method='samples',\ - size=NSAMPLES),\ - atol_diff=1e-1, atol_diff2=1e-1,\ - test_xvals=TEST_XVALS, do_samples=True)) + """Make data for unit tests""" + cls.test_data = dict( + hist=dict( + gen_func=hist, + ctor_data=dict(bins=XBINS, pdfs=HIST_DATA), + convert_data=dict(bins=XBINS), + atol_diff=1e-1, + atol_diff2=1e-1, + test_xvals=TEST_XVALS, + ), + hist_samples=dict( + gen_func=hist, + ctor_data=dict(bins=XBINS, pdfs=HIST_DATA), + convert_data=dict(bins=XBINS, method="samples", size=NSAMPLES), + atol_diff=1e-1, + atol_diff2=1e-1, + test_xvals=TEST_XVALS, + do_samples=True, + ), + ) hist = hist_gen.create diff --git a/src/qp/interp_pdf.py b/src/qp/interp_pdf.py index 200dbe25..60025d1e 100644 --- a/src/qp/interp_pdf.py +++ b/src/qp/interp_pdf.py @@ -4,15 +4,18 @@ import numpy as np from scipy.stats import rv_continuous -from qp.conversion_funcs import (extract_vals_at_x, extract_xy_sparse, - extract_xy_vals) +from qp.conversion_funcs import extract_vals_at_x, extract_xy_sparse, extract_xy_vals from qp.factory import add_class from qp.pdf_gen import Pdf_rows_gen from qp.plotting import get_axes_and_xlims, plot_pdf_on_axes from qp.test_data import TEST_XVALS, XARRAY, XBINS, YARRAY -from qp.utils import (interpolate_multi_x_multi_y, interpolate_multi_x_y, - interpolate_x_multi_y, normalize_interp1d, - reshape_to_pdf_size) +from qp.utils import ( + interpolate_multi_x_multi_y, + interpolate_multi_x_y, + interpolate_x_multi_y, + normalize_interp1d, + reshape_to_pdf_size, +) class interp_gen(Pdf_rows_gen): @@ -46,9 +49,10 @@ class interp_gen(Pdf_rows_gen): ppf(0) will return xvals[0] ppf(1) will return xvals[-1] """ + # pylint: disable=protected-access - name = 'interp' + name = "interp" version = 0 _support_mask = rv_continuous._support_mask @@ -64,37 +68,42 @@ def __init__(self, xvals, yvals, *args, **kwargs): yvals : array_like The y-values used to do the interpolation """ - if np.size(xvals) != np.shape(yvals)[-1]: # pragma: no cover - raise ValueError("Shape of xbins in xvals (%s) != shape of xbins in yvals (%s)" % - (np.size(xvals), np.shape(yvals)[-1])) + if np.size(xvals) != np.shape(yvals)[-1]: # pragma: no cover + raise ValueError( + "Shape of xbins in xvals (%s) != shape of xbins in yvals (%s)" + % (np.size(xvals), np.shape(yvals)[-1]) + ) self._xvals = np.asarray(xvals) # Set support self._xmin = self._xvals[0] self._xmax = self._xvals[-1] - kwargs['shape'] = np.shape(yvals)[:-1] + kwargs["shape"] = np.shape(yvals)[:-1] self._yvals = reshape_to_pdf_size(yvals, -1) - check_input = kwargs.pop('check_input', True) + check_input = kwargs.pop("check_input", True) if check_input: self._compute_ycumul() - self._yvals = (self._yvals.T / self._ycumul[:,-1]).T - self._ycumul = (self._ycumul.T / self._ycumul[:,-1]).T + self._yvals = (self._yvals.T / self._ycumul[:, -1]).T + self._ycumul = (self._ycumul.T / self._ycumul[:, -1]).T else: # pragma: no cover self._ycumul = None super().__init__(*args, **kwargs) - self._addmetadata('xvals', self._xvals) - self._addobjdata('yvals', self._yvals) + self._addmetadata("xvals", self._xvals) + self._addobjdata("yvals", self._yvals) def _compute_ycumul(self): copy_shape = np.array(self._yvals.shape) self._ycumul = np.ndarray(copy_shape) self._ycumul[:, 0] = 0.5 * self._yvals[:, 0] * (self._xvals[1] - self._xvals[0]) - self._ycumul[:, 1:] = np.cumsum((self._xvals[1:] - self._xvals[:-1]) * - 0.5 * np.add(self._yvals[:,1:], - self._yvals[:,:-1]), axis=1) + self._ycumul[:, 1:] = np.cumsum( + (self._xvals[1:] - self._xvals[:-1]) + * 0.5 + * np.add(self._yvals[:, 1:], self._yvals[:, :-1]), + axis=1, + ) @property def xvals(self): @@ -108,34 +117,42 @@ def yvals(self): def _pdf(self, x, row): # pylint: disable=arguments-differ - return interpolate_x_multi_y(x, row, self._xvals, self._yvals, - bounds_error=False, fill_value=0.).ravel() + return interpolate_x_multi_y( + x, row, self._xvals, self._yvals, bounds_error=False, fill_value=0.0 + ).ravel() def _cdf(self, x, row): # pylint: disable=arguments-differ if self._ycumul is None: # pragma: no cover self._compute_ycumul() - return interpolate_x_multi_y(x, row, self._xvals, self._ycumul, - bounds_error=False, fill_value=(0.,1.)).ravel() + return interpolate_x_multi_y( + x, row, self._xvals, self._ycumul, bounds_error=False, fill_value=(0.0, 1.0) + ).ravel() def _ppf(self, x, row): # pylint: disable=arguments-differ if self._ycumul is None: # pragma: no cover self._compute_ycumul() - return interpolate_multi_x_y(x, row, self._ycumul, self._xvals, - bounds_error=False, fill_value=(self._xmin, self._xmax)).ravel() + return interpolate_multi_x_y( + x, + row, + self._ycumul, + self._xvals, + bounds_error=False, + fill_value=(self._xmin, self._xmax), + ).ravel() def _munp(self, m, *args): - """ compute moments """ + """compute moments""" # pylint: disable=arguments-differ # Silence floating point warnings from integration. - with np.errstate(all='ignore'): + with np.errstate(all="ignore"): vals = self.custom_generic_moment(m) return vals def custom_generic_moment(self, m): - """ Compute the mth moment """ + """Compute the mth moment""" m = np.asarray(m) dx = self._xvals[1] - self._xvals[0] return np.sum(self._xvals**m * self._yvals, axis=1) * dx @@ -145,8 +162,8 @@ def _updated_ctor_param(self): Set the bins as additional constructor argument """ dct = super()._updated_ctor_param() - dct['xvals'] = self._xvals - dct['yvals'] = self._yvals + dct["xvals"] = self._xvals + dct["yvals"] = self._yvals return dct @classmethod @@ -154,7 +171,7 @@ def get_allocation_kwds(cls, npdf, **kwargs): """Return the keywords necessary to create an 'empty' hdf5 file with npdf entries for iterative file writeout. We only need to allocate the objdata columns, as the metadata can be written when we finalize the file. - + Parameters ---------- npdf: int @@ -162,10 +179,10 @@ def get_allocation_kwds(cls, npdf, **kwargs): kwargs: dict dictionary of kwargs needed to create the ensemble """ - if 'xvals' not in kwargs: #pragma: no cover + if "xvals" not in kwargs: # pragma: no cover raise ValueError("required argument xvals not included in kwargs") - ngrid = np.shape(kwargs['xvals'])[-1] - return dict(yvals=((npdf, ngrid), 'f4')) + ngrid = np.shape(kwargs["xvals"])[-1] + return dict(yvals=((npdf, ngrid), "f4")) @classmethod def plot_native(cls, pdf, **kwargs): @@ -184,12 +201,18 @@ def add_mappings(cls): cls._add_creation_method(cls.create, None) cls._add_extraction_method(extract_vals_at_x, None) - @classmethod def make_test_data(cls): - """ Make data for unit tests """ - cls.test_data = dict(interp=dict(gen_func=interp, ctor_data=dict(xvals=XBINS, yvals=YARRAY),\ - convert_data=dict(xvals=XBINS), test_xvals=TEST_XVALS)) + """Make data for unit tests""" + cls.test_data = dict( + interp=dict( + gen_func=interp, + ctor_data=dict(xvals=XBINS, yvals=YARRAY), + convert_data=dict(xvals=XBINS), + test_xvals=TEST_XVALS, + ) + ) + interp = interp_gen.create @@ -224,9 +247,10 @@ class interp_irregular_gen(Pdf_rows_gen): ppf(0) will return min(xvals) ppf(1) will return max(xvals) """ + # pylint: disable=protected-access - name = 'interp_irregular' + name = "interp_irregular" version = 0 _support_mask = rv_continuous._support_mask @@ -242,30 +266,35 @@ def __init__(self, xvals, yvals, *args, **kwargs): yvals : array_like The y-values used to do the interpolation """ - if np.shape(xvals) != np.shape(yvals): # pragma: no cover - raise ValueError("Shape of xvals (%s) != shape of yvals (%s)" % - (np.shape(xvals), np.shape(yvals))) + if np.shape(xvals) != np.shape(yvals): # pragma: no cover + raise ValueError( + "Shape of xvals (%s) != shape of yvals (%s)" + % (np.shape(xvals), np.shape(yvals)) + ) self._xvals = reshape_to_pdf_size(xvals, -1) self._xmin = np.min(self._xvals) self._xmax = np.max(self._xvals) - kwargs['shape'] = np.shape(xvals)[:-1] + kwargs["shape"] = np.shape(xvals)[:-1] - check_input = kwargs.pop('check_input', True) + check_input = kwargs.pop("check_input", True) self._yvals = reshape_to_pdf_size(yvals, -1) if check_input: self._yvals = normalize_interp1d(self._xvals, self._yvals) self._ycumul = None super().__init__(*args, **kwargs) - self._addobjdata('xvals', self._xvals) - self._addobjdata('yvals', self._yvals) + self._addobjdata("xvals", self._xvals) + self._addobjdata("yvals", self._yvals) def _compute_ycumul(self): copy_shape = np.array(self._yvals.shape) self._ycumul = np.ndarray(copy_shape) - self._ycumul[:,0] = 0. - self._ycumul[:,1:] = np.cumsum(self._xvals[:,1:]*self._yvals[:,1:] - self._xvals[:,:-1]*self._yvals[:,1:], axis=1) - + self._ycumul[:, 0] = 0.0 + self._ycumul[:, 1:] = np.cumsum( + self._xvals[:, 1:] * self._yvals[:, 1:] + - self._xvals[:, :-1] * self._yvals[:, 1:], + axis=1, + ) @property def xvals(self): @@ -279,32 +308,38 @@ def yvals(self): def _pdf(self, x, row): # pylint: disable=arguments-differ - return interpolate_multi_x_multi_y(x, row, self._xvals, self._yvals, - bounds_error=False, fill_value=0.).ravel() + return interpolate_multi_x_multi_y( + x, row, self._xvals, self._yvals, bounds_error=False, fill_value=0.0 + ).ravel() def _cdf(self, x, row): # pylint: disable=arguments-differ if self._ycumul is None: # pragma: no cover self._compute_ycumul() - return interpolate_multi_x_multi_y(x, row, self._xvals, self._ycumul, - bounds_error=False, fill_value=(0., 1.)).ravel() - + return interpolate_multi_x_multi_y( + x, row, self._xvals, self._ycumul, bounds_error=False, fill_value=(0.0, 1.0) + ).ravel() def _ppf(self, x, row): # pylint: disable=arguments-differ if self._ycumul is None: # pragma: no cover self._compute_ycumul() - return interpolate_multi_x_multi_y(x, row, self._ycumul, self._xvals, - bounds_error=False, fill_value=(self._xmin, self._xmax)).ravel() - + return interpolate_multi_x_multi_y( + x, + row, + self._ycumul, + self._xvals, + bounds_error=False, + fill_value=(self._xmin, self._xmax), + ).ravel() def _updated_ctor_param(self): """ Set the bins as additional constructor argument """ dct = super()._updated_ctor_param() - dct['xvals'] = self._xvals - dct['yvals'] = self._yvals + dct["xvals"] = self._xvals + dct["yvals"] = self._yvals return dct @classmethod @@ -320,10 +355,10 @@ def get_allocation_kwds(cls, npdf, **kwargs): kwargs: dict dictionary of kwargs needed to create the ensemble """ - if 'xvals' not in kwargs: #pragma: no cover + if "xvals" not in kwargs: # pragma: no cover raise ValueError("required argument xvals not included in kwargs") - ngrid = np.shape(kwargs['xvals'])[-1] - return dict(xvals=((npdf, ngrid), 'f4'), yvals=((npdf, ngrid), 'f4')) + ngrid = np.shape(kwargs["xvals"])[-1] + return dict(xvals=((npdf, ngrid), "f4"), yvals=((npdf, ngrid), "f4")) @classmethod def plot_native(cls, pdf, **kwargs): @@ -344,12 +379,17 @@ def add_mappings(cls): cls._add_extraction_method(extract_xy_vals, None) cls._add_extraction_method(extract_xy_sparse, None) - @classmethod def make_test_data(cls): - """ Make data for unit tests """ - cls.test_data = dict(interp_irregular=dict(gen_func=interp_irregular, ctor_data=dict(xvals=XARRAY, yvals=YARRAY),\ - convert_data=dict(xvals=XBINS), test_xvals=TEST_XVALS)) + """Make data for unit tests""" + cls.test_data = dict( + interp_irregular=dict( + gen_func=interp_irregular, + ctor_data=dict(xvals=XARRAY, yvals=YARRAY), + convert_data=dict(xvals=XBINS), + test_xvals=TEST_XVALS, + ) + ) interp_irregular = interp_irregular_gen.create diff --git a/src/qp/lazy_modules.py b/src/qp/lazy_modules.py index 057a91e9..22db4ba3 100644 --- a/src/qp/lazy_modules.py +++ b/src/qp/lazy_modules.py @@ -2,6 +2,6 @@ from tables_io.lazy_modules import lazyImport -mpl = lazyImport('matplotlib') -plt = lazyImport('matplotlib.pyplot') -mixture = lazyImport('sklearn.mixture') +mpl = lazyImport("matplotlib") +plt = lazyImport("matplotlib.pyplot") +mixture = lazyImport("sklearn.mixture") diff --git a/src/qp/metrics/__init__.py b/src/qp/metrics/__init__.py index cb2f318e..31163c7d 100644 --- a/src/qp/metrics/__init__.py +++ b/src/qp/metrics/__init__.py @@ -5,7 +5,11 @@ # added for testing purposes -from .metrics import _calculate_grid_parameters, _check_ensemble_is_not_nested, _check_ensembles_are_same_size +from .metrics import ( + _calculate_grid_parameters, + _check_ensemble_is_not_nested, + _check_ensembles_are_same_size, +) from .metrics import _check_ensembles_contain_correct_number_of_distributions from .factory import MetricFactory diff --git a/src/qp/metrics/array_metrics.py b/src/qp/metrics/array_metrics.py index a77c6e61..d3b538ce 100644 --- a/src/qp/metrics/array_metrics.py +++ b/src/qp/metrics/array_metrics.py @@ -7,8 +7,9 @@ from qp.utils import safelog + def quick_anderson_ksamp(p_random_variables, q_random_variables, **kwargs): - """Calculate the k-sample Anderson-Darling statistic using scipy.stats.anderson_ksamp for two CDFs. + """Calculate the k-sample Anderson-Darling statistic using scipy.stats.anderson_ksamp for two CDFs. For more details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html Parameters @@ -25,6 +26,7 @@ def quick_anderson_ksamp(p_random_variables, q_random_variables, **kwargs): """ return stats.anderson_ksamp([p_random_variables, q_random_variables], **kwargs) + def quick_kld(p_eval, q_eval, dx=0.01): """ Calculates the Kullback-Leibler Divergence between two evaluations of PDFs. @@ -51,6 +53,7 @@ def quick_kld(p_eval, q_eval, dx=0.01): Dpq = dx * np.sum(p_eval * logquotient, axis=-1) return Dpq + def quick_moment(p_eval, grid_to_N, dx): """ Calculates a moment of an evaluated PDF @@ -74,6 +77,7 @@ def quick_moment(p_eval, grid_to_N, dx): M = np.dot(p_eval, grid_to_N) * dx return M + def quick_rmse(p_eval, q_eval, N): """ Calculates the Root Mean Square Error between two evaluations of PDFs. @@ -98,6 +102,7 @@ def quick_rmse(p_eval, q_eval, N): rms = np.sqrt(np.sum((p_eval - q_eval) ** 2, axis=-1) / N) return rms + def quick_rbpe(pdf_function, integration_bounds, limits=(np.inf, np.inf)): """ Calculates the risk based point estimate of a qp.Ensemble object with npdf == 1. diff --git a/src/qp/metrics/brier.py b/src/qp/metrics/brier.py index c268c7d0..3e51fdf7 100644 --- a/src/qp/metrics/brier.py +++ b/src/qp/metrics/brier.py @@ -4,7 +4,7 @@ class Brier: """Brier score based on https://en.wikipedia.org/wiki/Brier_score#Original_definition_by_Brier - + Parameters ---------- prediction: NxM array, float diff --git a/src/qp/metrics/factory.py b/src/qp/metrics/factory.py index 9996d749..daa27c70 100644 --- a/src/qp/metrics/factory.py +++ b/src/qp/metrics/factory.py @@ -2,7 +2,7 @@ def get_all_subclasses(cls): - """ Utility function to recursively get all the subclasses of a class """ + """Utility function to recursively get all the subclasses of a class""" all_subclasses = [] for subclass in cls.__subclasses__(): @@ -13,38 +13,43 @@ def get_all_subclasses(cls): class MetricFactory: - metric_dict = {} @classmethod def update_metrics(cls): - """ Update the dictionary of known metrics """ + """Update the dictionary of known metrics""" all_subclasses = get_all_subclasses(BaseMetric) - cls.metric_dict = { subcls.metric_name.replace("Metric", ""): subcls for subcls in all_subclasses if subcls.metric_name } + cls.metric_dict = { + subcls.metric_name.replace("Metric", ""): subcls + for subcls in all_subclasses + if subcls.metric_name + } @classmethod def print_metrics(cls, force_update=False): - """ List all the known metrics """ + """List all the known metrics""" if not cls.metric_dict or force_update: cls.update_metrics() - print('List of available metrics') + print("List of available metrics") for key, val in cls.metric_dict.items(): - print(key, val, val.metric_input_type.name) + print(key, val, val.metric_input_type.name) @classmethod def list_metrics(cls, force_update=False): - """ Get the list of all the metric names """ + """Get the list of all the metric names""" if not cls.metric_dict or force_update: cls.update_metrics() return list(cls.metric_dict.keys()) @classmethod def create_metric(cls, name, force_update=False, **kwargs): - """ Create a metric evaluator """ + """Create a metric evaluator""" if not cls.metric_dict or force_update: cls.update_metrics() try: - metric_class = cls.metric_dict[name] + metric_class = cls.metric_dict[name] except KeyError as msg: - raise KeyError(f"{name} is not in the set of known metrics {str(cls.list_metrics())}") from msg + raise KeyError( + f"{name} is not in the set of known metrics {str(cls.list_metrics())}" + ) from msg return metric_class(**kwargs) diff --git a/src/qp/metrics/goodness_of_fit.py b/src/qp/metrics/goodness_of_fit.py index e8f20a31..568b965b 100644 --- a/src/qp/metrics/goodness_of_fit.py +++ b/src/qp/metrics/goodness_of_fit.py @@ -1,6 +1,8 @@ """This module contains functions copied from the 1.10.0dev branch of Scipy. -The original code can be found here: https://github.com/scipy/scipy/blob/maintenance/1.10.x/scipy/stats/_fit.py#L722 -The original Scipy 1.10.0dev code is wrapped with a function that prevents passing more than 1 distribution at a time. +The original code can be found here: +https://github.com/scipy/scipy/blob/maintenance/1.10.x/scipy/stats/_fit.py#L722 +The original Scipy 1.10.0dev code is wrapped with a function that prevents +passing more than 1 distribution at a time. So, temporarily, we'll make use of the underlying, vectorized functions. Fortunately, the vectorized Scipy code works without modification. Once Scipy 1.10 is made available, we can swap out the copied functions for those in Scipy 1.10. @@ -8,14 +10,16 @@ import numpy as np + def _anderson_darling(dist, data): x = np.sort(data, axis=-1) n = data.shape[-1] - i = np.arange(1, n+1) - Si = (2*i - 1)/n * (dist.logcdf(x) + dist.logsf(x[..., ::-1])) + i = np.arange(1, n + 1) + Si = (2 * i - 1) / n * (dist.logcdf(x) + dist.logsf(x[..., ::-1])) S = np.sum(Si, axis=-1) return -n - S + def _kolmogorov_smirnov(dist, data): x = np.sort(data, axis=-1) cdfvals = dist.cdf(x) @@ -23,28 +27,32 @@ def _kolmogorov_smirnov(dist, data): Dminus = _compute_dminus(cdfvals) return np.maximum(Dplus, Dminus) + def _compute_dplus(cdfvals): # adapted from _stats_py before gh-17062 n = cdfvals.shape[-1] return (np.arange(1.0, n + 1) / n - cdfvals).max(axis=-1) + def _compute_dminus(cdfvals, axis=-1): n = cdfvals.shape[-1] - return (cdfvals - np.arange(0.0, n)/n).max(axis=axis) + return (cdfvals - np.arange(0.0, n) / n).max(axis=axis) + def _cramer_von_mises(dist, data): x = np.sort(data, axis=-1) n = data.shape[-1] cdfvals = dist.cdf(x) - u = (2*np.arange(1, n+1) - 1)/(2*n) - w = 1 / (12*n) + np.sum((u - cdfvals)**2, axis=-1) + u = (2 * np.arange(1, n + 1) - 1) / (2 * n) + w = 1 / (12 * n) + np.sum((u - cdfvals) ** 2, axis=-1) return w + # The following methods can be replaced by: # scipy.stats._fit._anderson_darling, # scipy.stats._fit._cramer_von_mises, and # scipy.stats._fit._kolmogorov_smirnov when Scipy 1.10 is available. goodness_of_fit_metrics = { - 'ad': _anderson_darling, - 'cvm': _cramer_von_mises, - 'ks': _kolmogorov_smirnov + "ad": _anderson_darling, + "cvm": _cramer_von_mises, + "ks": _kolmogorov_smirnov, } diff --git a/src/qp/metrics/metrics.py b/src/qp/metrics/metrics.py index 21b730ed..ea1ec472 100644 --- a/src/qp/metrics/metrics.py +++ b/src/qp/metrics/metrics.py @@ -11,9 +11,12 @@ from qp.metrics.goodness_of_fit import goodness_of_fit_metrics from qp.utils import epsilon -Grid = namedtuple('Grid', ['grid_values', 'cardinality', 'resolution', 'hist_bin_edges', 'limits']) +Grid = namedtuple( + "Grid", ["grid_values", "cardinality", "resolution", "hist_bin_edges", "limits"] +) -def _calculate_grid_parameters(limits, dx:float=0.01) -> Grid: + +def _calculate_grid_parameters(limits, dx: float = 0.01) -> Grid: """ Create a grid of points and return parameters describing it. @@ -37,10 +40,13 @@ def _calculate_grid_parameters(limits, dx:float=0.01) -> Grid: cardinality = int((limits[-1] - limits[0]) / dx) grid_values = np.linspace(limits[0], limits[1], cardinality) resolution = (limits[-1] - limits[0]) / (cardinality - 1) - hist_bin_edges = np.histogram_bin_edges((limits[0]-resolution/2, limits[1]+resolution/2), cardinality) + hist_bin_edges = np.histogram_bin_edges( + (limits[0] - resolution / 2, limits[1] + resolution / 2), cardinality + ) return Grid(grid_values, cardinality, resolution, hist_bin_edges, limits) + def calculate_moment(p, N, limits, dx=0.01): """ Calculates a moment of a qp.Ensemble object @@ -68,7 +74,7 @@ def calculate_moment(p, N, limits, dx=0.01): pe = p.gridded(grid.grid_values)[1] # calculate the moment - grid_to_N = grid.grid_values ** N + grid_to_N = grid.grid_values**N M = array_metrics.quick_moment(pe, grid_to_N, grid.resolution) return M @@ -99,7 +105,9 @@ def calculate_kld(p, q, limits, dx=0.01): TO DO: have this take number of points not dx! """ if p.shape != q.shape: - raise ValueError('Cannot calculate KLD between two ensembles with different shapes') + raise ValueError( + "Cannot calculate KLD between two ensembles with different shapes" + ) # Make a grid from the limits and resolution grid = _calculate_grid_parameters(limits, dx) @@ -111,11 +119,13 @@ def calculate_kld(p, q, limits, dx=0.01): qn = qe[1] # Calculate the KLD from q to p - Dpq = array_metrics.quick_kld(pn, qn, grid.resolution)# np.dot(pn * logquotient, np.ones(len(grid)) * dx) + Dpq = array_metrics.quick_kld( + pn, qn, grid.resolution + ) # np.dot(pn * logquotient, np.ones(len(grid)) * dx) - if np.any(Dpq < 0.): #pragma: no cover - print('broken KLD: '+str((Dpq, pn, qn, grid.resolution))) - Dpq = epsilon*np.ones(Dpq.shape) + if np.any(Dpq < 0.0): # pragma: no cover + print("broken KLD: " + str((Dpq, pn, qn, grid.resolution))) + Dpq = epsilon * np.ones(Dpq.shape) return Dpq @@ -126,9 +136,11 @@ def calculate_rmse(p, q, limits, dx=0.01): Parameters ---------- p: qp.Ensemble object - probability distribution function whose distance between its truth and the approximation of `q` will be calculated. + probability distribution function whose distance between its truth and the + approximation of `q` will be calculated. q: qp.Ensemble object - probability distribution function whose distance between its approximation and the truth of `p` will be calculated. + probability distribution function whose distance between its approximation and the + truth of `p` will be calculated. limits: tuple of floats endpoints of integration interval in which to calculate RMS dx: float @@ -144,7 +156,9 @@ def calculate_rmse(p, q, limits, dx=0.01): TO DO: change dx to N """ if p.shape != q.shape: - raise ValueError('Cannot calculate RMSE between two ensembles with different shapes') + raise ValueError( + "Cannot calculate RMSE between two ensembles with different shapes" + ) # Make a grid from the limits and resolution grid = _calculate_grid_parameters(limits, dx) @@ -154,7 +168,9 @@ def calculate_rmse(p, q, limits, dx=0.01): qe = q.gridded(grid.grid_values)[1] # Calculate the RMS between p and q - rms = array_metrics.quick_rmse(pe, qe, grid.cardinality)# np.sqrt(dx * np.sum((pe - qe) ** 2)) + rms = array_metrics.quick_rmse( + pe, qe, grid.cardinality + ) # np.sqrt(dx * np.sum((pe - qe) ** 2)) return rms @@ -162,7 +178,7 @@ def calculate_rmse(p, q, limits, dx=0.01): def calculate_rbpe(p, limits=(np.inf, np.inf)): """ Calculates the risk based point estimates of a qp.Ensemble object. - Algorithm as defined in 4.2 of 'Photometric redshifts for Hyper Suprime-Cam + Algorithm as defined in 4.2 of 'Photometric redshifts for Hyper Suprime-Cam Subaru Strategic Program Data Release 1' (Tanaka et al. 2018). Parameters @@ -185,30 +201,35 @@ def evaluate_pdf_at_z(z, dist): return dist.pdf(z)[0][0] for n in range(0, p.npdf): - if p[n].npdf != 1: - raise ValueError('quick_rbpe only handles Ensembles with a single PDF ' - 'for ensembles with more than one PDF, use the qp.metrics.risk_based_point_estimate function.') + raise ValueError( + "quick_rbpe only handles Ensembles with a single PDF " + "for ensembles with more than one PDF, use the qp.metrics.risk_based_point_estimate function." + ) this_dist_pdf_at_z = partial(evaluate_pdf_at_z, dist=p[n]) integration_bounds = (p[n].ppf(0.01)[0][0], p[n].ppf(0.99)[0][0]) - rbpes.append(array_metrics.quick_rbpe(this_dist_pdf_at_z, integration_bounds, limits)) + rbpes.append( + array_metrics.quick_rbpe(this_dist_pdf_at_z, integration_bounds, limits) + ) return np.array(rbpes) + def calculate_brier(p, truth, limits, dx=0.01): """This function will do the following: - + 1) Generate a Mx1 sized grid based on `limits` and `dx`. - 2) Produce an NxM array by evaluating the pdf for each of the N distribution objects in the Ensemble p on the grid. + 2) Produce an NxM array by evaluating the pdf for each of the N distribution objects + in the Ensemble p on the grid. 3) Produce an NxM truth_array using the input truth and the generated grid. All values will be 0 or 1. 4) Create a Brier metric evaluation object 5) Return the result of the Brier metric calculation. Parameters ---------- - p: qp.Ensemble object + p: qp.Ensemble object of N distributions probability distribution functions that will be gridded and compared against truth. truth: Nx1 sequence the list of true values, 1 per distribution in p. @@ -222,9 +243,13 @@ def calculate_brier(p, truth, limits, dx=0.01): Brier_metric: float """ - # Ensure that the number of distributions objects in the Ensemble is equal to the length of the truth array + # Ensure that the number of distributions objects in the Ensemble + # is equal to the length of the truth array if p.npdf != len(truth): - raise ValueError("Number of distributions in the Ensemble (%d) is not equal to the number of truth values (%d)" % (p.npdf, len(truth))) + raise ValueError( + "Number of distributions in the Ensemble (%d) is not equal to the number of truth values (%d)" + % (p.npdf, len(truth)) + ) # Values of truth that are outside the defined limits will not appear truth_array. # Consider expanding the limits or using numpy.clip to restrict truth values to the limits. @@ -250,13 +275,17 @@ def calculate_brier(p, truth, limits, dx=0.01): # return the results of evaluating the Brier metric return brier_metric_evaluation.evaluate() + @deprecated( reason=""" This implementation is deprecated for performance reasons and does not accommodate N vs 1 comparisons. Please use calculate_goodness_of_fit instead. This method is for testing purposes only. """, - category=DeprecationWarning) -def calculate_anderson_darling(p, scipy_distribution='norm', num_samples=100, _random_state=None): # pylint: disable=unused-argument + category=DeprecationWarning, +) +def calculate_anderson_darling( + p, scipy_distribution="norm", num_samples=100, _random_state=None +): # pylint: disable=unused-argument """This function is deprecated and will be completely removed in a later version. Please use `calculate_goodness_of_fit` instead. @@ -264,15 +293,21 @@ def calculate_anderson_darling(p, scipy_distribution='norm', num_samples=100, _r ------- logger.warning """ - logging.warning("This function is deprecated, please use `calculate_goodness_of_fit` with `fit_metric='ad'`") # pragma: no cover + logging.warning( + "This function is deprecated, please use `calculate_goodness_of_fit` with `fit_metric='ad'`" + ) # pragma: no cover + @deprecated( reason=""" This implementation is deprecated for performance reasons and does not accommodate N vs 1 comparisons. Please use calculate_goodness_of_fit instead. This method is for testing purposes only. """, - category=DeprecationWarning) -def calculate_cramer_von_mises(p, q, num_samples=100, _random_state=None, **kwargs): # pylint: disable=unused-argument + category=DeprecationWarning, +) +def calculate_cramer_von_mises( + p, q, num_samples=100, _random_state=None, **kwargs +): # pylint: disable=unused-argument """This function is deprecated and will be completely removed in a later version. Please use `calculate_goodness_of_fit` instead. @@ -280,15 +315,21 @@ def calculate_cramer_von_mises(p, q, num_samples=100, _random_state=None, **kwar ------- logger.warning """ - logging.warning("This function is deprecated, please use `calculate_goodness_of_fit` with `fit_metric='cvm'`") # pragma: no cover + logging.warning( + "This function is deprecated, please use `calculate_goodness_of_fit` with `fit_metric='cvm'`" + ) # pragma: no cover + @deprecated( reason=""" This implementation is deprecated for performance reasons and does not accommodate N vs 1 comparisons. Please use calculate_goodness_of_fit instead. This method is for testing purposes only. """, - category=DeprecationWarning) -def calculate_kolmogorov_smirnov(p, q, num_samples=100, _random_state=None): # pylint: disable=unused-argument + category=DeprecationWarning, +) +def calculate_kolmogorov_smirnov( + p, q, num_samples=100, _random_state=None +): # pylint: disable=unused-argument """This function is deprecated and will be completely removed in a later version. Please use `calculate_goodness_of_fit` instead. @@ -296,7 +337,10 @@ def calculate_kolmogorov_smirnov(p, q, num_samples=100, _random_state=None): # ------- logger.warning """ - logging.warning("This function is deprecated, please use `calculate_goodness_of_fit` with `fit_metric='ks'`") # pragma: no cover + logging.warning( + "This function is deprecated, please use `calculate_goodness_of_fit` with `fit_metric='ks'`" + ) # pragma: no cover + def calculate_outlier_rate(p, lower_limit=0.0001, upper_limit=0.9999): """Fraction of outliers in each distribution @@ -325,7 +369,10 @@ def calculate_outlier_rate(p, lower_limit=0.0001, upper_limit=0.9999): outlier_rates = np.array([(dist.cdf(lower_limit) + (1. - dist.cdf(upper_limit)))[0][0] for dist in p]) return outlier_rates -def calculate_goodness_of_fit(estimate, reference, fit_metric='ks', num_samples=100, _random_state=None): + +def calculate_goodness_of_fit( + estimate, reference, fit_metric="ks", num_samples=100, _random_state=None +): """This method calculates goodness of fit between the distributions in the `estimate` and `reference` Ensembles using the specified fit_metric. @@ -370,18 +417,31 @@ def calculate_goodness_of_fit(estimate, reference, fit_metric='ks', num_samples= try: _check_ensembles_contain_correct_number_of_distributions(estimate, reference) - except ValueError: #pragma: no cover - unittest coverage for _check_ensembles_contain_correct_number_of_distributions is complete - logging.warning("The ensemble `reference` should have 1 or N distributions. With N = number of distributions in the ensemble `estimate`.") + except ( + ValueError + ): # pragma: no cover - unittest coverage for _check_ensembles_contain_correct_number_of_distributions is complete pylint: disable=line-too-long + logging.warning( + "The ensemble `reference` should have 1 or N distributions. " + "With N = number of distributions in the ensemble `estimate`." + ) try: _check_ensemble_is_not_nested(estimate) - except ValueError: #pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete - logging.warning("Each element in the ensemble `estimate` must be a single distribution.") + except ( + ValueError + ): # pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete + logging.warning( + "Each element in the ensemble `estimate` must be a single distribution." + ) try: _check_ensemble_is_not_nested(reference) - except ValueError: #pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete - logging.warning("Each element in the ensemble `reference` must be a single distribution.") + except ( + ValueError + ): # pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete + logging.warning( + "Each element in the ensemble `reference` must be a single distribution." + ) if fit_metric not in goodness_of_fit_metrics: metrics = list(goodness_of_fit_metrics.keys()) @@ -389,22 +449,24 @@ def calculate_goodness_of_fit(estimate, reference, fit_metric='ks', num_samples= return goodness_of_fit_metrics[fit_metric]( reference, - np.squeeze(estimate.rvs(size=num_samples, random_state=_random_state)) + np.squeeze(estimate.rvs(size=num_samples, random_state=_random_state)), ) + def _check_ensembles_are_same_size(p, q): - """This utility function ensures checks that two Ensembles contain an equal number of distribution objects. + """This utility function ensures checks that two Ensembles contain equal numbers of distributions" Args: p qp.Ensemble: An Ensemble containing 0 or more distributions q qp.Ensemble: A second Ensemble containing 0 or more distributions Raises: - ValueError: If the result of evaluating qp.Ensemble.npdf on each Ensemble is not the same, raise an error. + ValueError: If the result of evaluating qp.Ensemble.npdf on each Ensemble is not the same. """ if p.npdf != q.npdf: raise ValueError("Input ensembles should have the same number of distributions") + def _check_ensemble_is_not_nested(p): """This utility function ensures that each element in the Ensemble is a single distribution. @@ -412,11 +474,14 @@ def _check_ensemble_is_not_nested(p): p qp.Ensemble: An Ensemble that could contain nested Ensembles with multiple distributions in each Raises: - ValueError: If there are any elements of the input Ensemble that contain more than 1 PDF, raise an error. + ValueError: If there are any elements of the input Ensemble that contain more than 1 PDF. """ for dist in p: if dist.npdf != 1: - raise ValueError("Each element in the input Ensemble should be a single distribution.") + raise ValueError( + "Each element in the input Ensemble should be a single distribution." + ) + def _check_ensembles_contain_correct_number_of_distributions(estimate, reference): """This utility function ensures that the number of distributions in the ensembles matches @@ -451,4 +516,7 @@ def _check_ensembles_contain_correct_number_of_distributions(estimate, reference elif reference.npdf == 1: pass else: - raise ValueError("`reference` should contain either 1 distribution or the same number of distributions as `estimate`.") + raise ValueError( + "`reference` should contain either 1 distribution " + "or the same number of distributions as `estimate`." + ) diff --git a/src/qp/metrics/pit.py b/src/qp/metrics/pit.py index e6c3bd96..e19cc6ed 100644 --- a/src/qp/metrics/pit.py +++ b/src/qp/metrics/pit.py @@ -9,7 +9,8 @@ DEFAULT_QUANTS = np.linspace(0, 1, 100) -class PIT(): + +class PIT: """PIT(qp_ens, true_vals, eval_grid=DEFAULT_QUANTS) Probability Integral Transform @@ -44,7 +45,12 @@ def __init__(self, qp_ens, true_vals, eval_grid=DEFAULT_QUANTS): self._true_vals = true_vals # For each distribution in the Ensemble, calculate the CDF where x = known_true_value - self._pit_samps = np.array([qp_ens[i].cdf(self._true_vals[i])[0][0] for i in range(len(self._true_vals))]) + self._pit_samps = np.array( + [ + qp_ens[i].cdf(self._true_vals[i])[0][0] + for i in range(len(self._true_vals)) + ] + ) # These two lines set all `NaN` values to 0. This may or may not make sense # Alternatively if it's better to simply remove the `NaN`, this can be done @@ -52,12 +58,16 @@ def __init__(self, qp_ens, true_vals, eval_grid=DEFAULT_QUANTS): samp_mask = np.isfinite(self._pit_samps) self._pit_samps[~samp_mask] = 0 if not np.all(samp_mask): - logging.warning('Some PIT samples were `NaN`. They have been replacd with 0.') + logging.warning( + "Some PIT samples were `NaN`. They have been replacd with 0." + ) n_pit = np.min([len(self._pit_samps), len(eval_grid)]) if n_pit < len(eval_grid): - logging.warning('Number of pit samples is smaller than the evaluation grid size. ' - 'Will create a new evaluation grid with size = number of pit samples') + logging.warning( + "Number of pit samples is smaller than the evaluation grid size. " + "Will create a new evaluation grid with size = number of pit samples" + ) eval_grid = np.linspace(0, 1, n_pit) data_quants = np.quantile(self._pit_samps, eval_grid) @@ -72,13 +82,14 @@ def __init__(self, qp_ens, true_vals, eval_grid=DEFAULT_QUANTS): qp.quant, data=dict( quants=unique_eval_grid[quant_mask], - locs=np.atleast_2d(unique_data_quants[quant_mask]) - ) + locs=np.atleast_2d(unique_data_quants[quant_mask]), + ), ) @property def pit_samps(self): - """Returns the PIT samples. i.e. ``CDF(true_vals)`` for each distribution in the Ensemble used to initialize the PIT object. + """Returns the PIT samples. i.e. ``CDF(true_vals)`` for each distribution + in the Ensemble used to initialize the PIT object. Returns ------- @@ -99,7 +110,7 @@ def pit(self): return self._pit def calculate_pit_meta_metrics(self): - """Convenience method that will calculate all of the PIT meta metrics and return + """Convenience method that will calculate all of the PIT meta metrics and return them as a dictionary. Returns @@ -109,14 +120,14 @@ def calculate_pit_meta_metrics(self): """ pit_meta_metrics = {} - pit_meta_metrics['ad'] = self.evaluate_PIT_anderson_ksamp() - pit_meta_metrics['cvm'] = self.evaluate_PIT_CvM() - pit_meta_metrics['ks'] = self.evaluate_PIT_KS() - pit_meta_metrics['outlier_rate'] = self.evaluate_PIT_outlier_rate() + pit_meta_metrics["ad"] = self.evaluate_PIT_anderson_ksamp() + pit_meta_metrics["cvm"] = self.evaluate_PIT_CvM() + pit_meta_metrics["ks"] = self.evaluate_PIT_KS() + pit_meta_metrics["outlier_rate"] = self.evaluate_PIT_outlier_rate() return pit_meta_metrics - def evaluate_PIT_anderson_ksamp(self, pit_min=0., pit_max=1.): + def evaluate_PIT_anderson_ksamp(self, pit_min=0.0, pit_max=1.0): """Use scipy.stats.anderson_ksamp to compute the Anderson-Darling statistic for the cdf(truth) values by comparing with a uniform distribution between 0 and 1. Up to the current version (1.9.3), scipy.stats.anderson does not support @@ -136,7 +147,8 @@ def evaluate_PIT_anderson_ksamp(self, pit_min=0., pit_max=1.): ------- array A array of objects with attributes `statistic`, `critical_values`, and `significance_level`. - For details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html + For details see: + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html """ # Removed the CDF values that are outside the min/max range trimmed_pit_values = self._trim_pit_values(pit_min, pit_max) @@ -154,7 +166,8 @@ def evaluate_PIT_CvM(self): ------- array A array of objects with attributes `statistic` and `pvalue` - For details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.cramervonmises.html + For details see: + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.cramervonmises.html """ return stats.cramervonmises(self._pit_samps, stats.uniform.cdf) @@ -189,8 +202,8 @@ def evaluate_PIT_outlier_rate(self, pit_min=0.0001, pit_max=0.9999): return calculate_outlier_rate(self._pit, pit_min, pit_max)[0] @classmethod - def _create_quant_mask(self, data_quants): - """ Create a numpy mask such that, when applied only values greater than + def _create_quant_mask(cls, data_quants): + """Create a numpy mask such that, when applied only values greater than 0 and less than 1.0 are kept. While this function is fairly simple, separating it into a small helper method makes testing much easier. @@ -205,7 +218,7 @@ def _create_quant_mask(self, data_quants): The boolean mask """ - return np.bitwise_and(data_quants > 0., data_quants < 1) + return np.bitwise_and(data_quants > 0.0, data_quants < 1) def _trim_pit_values(self, cdf_min, cdf_max): """Remove and report any cdf(x) that are outside the min/max range. diff --git a/src/qp/mixmod_pdf.py b/src/qp/mixmod_pdf.py index b5789af7..e9cd405e 100644 --- a/src/qp/mixmod_pdf.py +++ b/src/qp/mixmod_pdf.py @@ -31,9 +31,10 @@ class mixmod_gen(Pdf_rows_gen): The ppf() is computed by computing the cdf() values on a fixed grid and interpolating the inverse function. """ + # pylint: disable=protected-access - name = 'mixmod' + name = "mixmod" version = 0 _support_mask = rv_continuous._support_mask @@ -49,29 +50,33 @@ def __init__(self, means, stds, weights, *args, **kwargs): stds: array_like The standard deviations of the Gaussians weights : array_like - The weights to attach to the Gaussians. Weights should sum up to one. If not, the weights are interpreted as relative weights. + The weights to attach to the Gaussians. Weights should sum up to one. + If not, the weights are interpreted as relative weights. """ self._scipy_version_warning() self._means = reshape_to_pdf_size(means, -1) self._stds = reshape_to_pdf_size(stds, -1) self._weights = reshape_to_pdf_size(weights, -1) - kwargs['shape'] = means.shape[:-1] + kwargs["shape"] = means.shape[:-1] self._ncomps = means.shape[-1] super().__init__(*args, **kwargs) - if np.any(self._weights<0): - raise ValueError('All weights need to be larger than zero') - self._weights = self._weights/self._weights.sum(axis=1)[:,None] - self._addobjdata('weights', self._weights) - self._addobjdata('stds', self._stds) - self._addobjdata('means', self._means) + if np.any(self._weights < 0): + raise ValueError("All weights need to be larger than zero") + self._weights = self._weights / self._weights.sum(axis=1)[:, None] + self._addobjdata("weights", self._weights) + self._addobjdata("stds", self._stds) + self._addobjdata("means", self._means) def _scipy_version_warning(self): import scipy # pylint: disable=import-outside-toplevel + scipy_version = scipy.__version__ - vtuple = scipy_version.split('.') + vtuple = scipy_version.split(".") if int(vtuple[0]) > 1 or int(vtuple[1]) > 7: return - raise DeprecationWarning(f"Mixmod_gen will not work correctly with scipy version < 1.8.0, you have {scipy_version}") #pragma: no cover + raise DeprecationWarning( + f"Mixmod_gen will not work correctly with scipy version < 1.8.0, you have {scipy_version}" + ) # pragma: no cover @property def weights(self): @@ -90,54 +95,62 @@ def stds(self): def _pdf(self, x, row): # pylint: disable=arguments-differ - if np.ndim(x) > 1: #pragma: no cover + if np.ndim(x) > 1: # pragma: no cover x = np.expand_dims(x, -2) - return (self.weights[row].swapaxes(-2,-1) * - sps.norm(loc=self._means[row].swapaxes(-2,-1), - scale=self._stds[row].swapaxes(-2,-1)).pdf(x)).sum(axis=0) + return ( + self.weights[row].swapaxes(-2, -1) + * sps.norm( + loc=self._means[row].swapaxes(-2, -1), + scale=self._stds[row].swapaxes(-2, -1), + ).pdf(x) + ).sum(axis=0) def _cdf(self, x, row): # pylint: disable=arguments-differ - if np.ndim(x) > 1: #pragma: no cover + if np.ndim(x) > 1: # pragma: no cover x = np.expand_dims(x, -2) - return (self.weights[row].swapaxes(-2,-1) * - sps.norm(loc=self._means[row].swapaxes(-2,-1), - scale=self._stds[row].swapaxes(-2,-1)).cdf(x)).sum(axis=0) + return ( + self.weights[row].swapaxes(-2, -1) + * sps.norm( + loc=self._means[row].swapaxes(-2, -1), + scale=self._stds[row].swapaxes(-2, -1), + ).cdf(x) + ).sum(axis=0) def _ppf(self, x, row): # pylint: disable=arguments-differ - min_val = np.min(self._means - 6*self._stds) - max_val = np.max(self._means + 6*self._stds) + min_val = np.min(self._means - 6 * self._stds) + max_val = np.max(self._means + 6 * self._stds) grid = np.linspace(min_val, max_val, 201) case_idx, _, rr = get_eval_case(x, row) if case_idx == 1: cdf_vals = self.cdf(grid, rr) elif case_idx == 3: cdf_vals = self.cdf(grid, np.expand_dims(rr, -1)) - else: #pragma: no cover - raise ValueError(f"Opps, we handle this kind of input to mixmod._ppf {case_idx}") - return interpolate_multi_x_y(x, row, cdf_vals, grid, - bounds_error=False, fill_value=(min_val, max_val)).ravel() - - + else: # pragma: no cover + raise ValueError( + f"Opps, we handle this kind of input to mixmod._ppf {case_idx}" + ) + return interpolate_multi_x_y( + x, row, cdf_vals, grid, bounds_error=False, fill_value=(min_val, max_val) + ).ravel() def _updated_ctor_param(self): """ Set the bins as additional constructor argument """ dct = super()._updated_ctor_param() - dct['means'] = self._means - dct['stds'] = self._stds - dct['weights'] = self._weights + dct["means"] = self._means + dct["stds"] = self._stds + dct["weights"] = self._weights return dct - @classmethod def get_allocation_kwds(cls, npdf, **kwargs): """Return the keywords necessary to create an 'empty' hdf5 file with npdf entries for iterative file writeout. We only need to allocate the objdata columns, as the metadata can be written when we finalize the file. - + Parameters ---------- npdf: int @@ -145,12 +158,15 @@ def get_allocation_kwds(cls, npdf, **kwargs): kwargs: dict dictionary of kwargs needed to create the ensemble """ - if 'means' not in kwargs: #pragma: no cover + if "means" not in kwargs: # pragma: no cover raise ValueError("required argument means not included in kwargs") - ncomp = np.shape(kwargs['means'])[-1] - return dict(means=((npdf, ncomp), 'f4'), stds=((npdf, ncomp), 'f4'), weights=((npdf, ncomp), 'f4')) - + ncomp = np.shape(kwargs["means"])[-1] + return dict( + means=((npdf, ncomp), "f4"), + stds=((npdf, ncomp), "f4"), + weights=((npdf, ncomp), "f4"), + ) @classmethod def add_mappings(cls): @@ -162,13 +178,18 @@ def add_mappings(cls): @classmethod def make_test_data(cls): - """ Make data for unit tests """ - cls.test_data = dict(mixmod=dict(gen_func=mixmod,\ - ctor_data=dict(weights=WEIGHT_MIXMOD,\ - means=MEAN_MIXMOD,\ - stds=STD_MIXMOD),\ - convert_data={}, test_xvals=TEST_XVALS, - atol_diff2=1.)) + """Make data for unit tests""" + cls.test_data = dict( + mixmod=dict( + gen_func=mixmod, + ctor_data=dict( + weights=WEIGHT_MIXMOD, means=MEAN_MIXMOD, stds=STD_MIXMOD + ), + convert_data={}, + test_xvals=TEST_XVALS, + atol_diff2=1.0, + ) + ) mixmod = mixmod_gen.create diff --git a/src/qp/packed_interp_pdf.py b/src/qp/packed_interp_pdf.py index d87cda7c..376c584f 100644 --- a/src/qp/packed_interp_pdf.py +++ b/src/qp/packed_interp_pdf.py @@ -9,8 +9,7 @@ from qp.pdf_gen import Pdf_rows_gen from qp.plotting import get_axes_and_xlims, plot_pdf_on_axes from qp.test_data import TEST_XVALS, XBINS, YARRAY -from qp.utils import (interpolate_multi_x_y, interpolate_x_multi_y, - reshape_to_pdf_size) +from qp.utils import interpolate_multi_x_y, interpolate_x_multi_y, reshape_to_pdf_size def extract_and_pack_vals_at_x(in_dist, **kwargs): @@ -34,16 +33,18 @@ def extract_and_pack_vals_at_x(in_dist, **kwargs): data : `dict` The extracted data """ - xvals = kwargs.pop('xvals', None) - packing_type = kwargs.pop('packing_type') - if xvals is None: # pragma: no cover + xvals = kwargs.pop("xvals", None) + packing_type = kwargs.pop("packing_type") + if xvals is None: # pragma: no cover raise ValueError("To convert to extract_xy_vals you must specify xvals") yvals = in_dist.pdf(xvals) ypacked, ymax = pack_array(packing_type, yvals, **kwargs) - return dict(xvals=xvals, ypacked=ypacked, ymax=ymax, packing_type=packing_type, **kwargs) + return dict( + xvals=xvals, ypacked=ypacked, ymax=ymax, packing_type=packing_type, **kwargs + ) -class packed_interp_gen(Pdf_rows_gen): +class packed_interp_gen(Pdf_rows_gen): # pylint: disable=too-many-instance-attributes """Interpolator based distribution Notes @@ -54,14 +55,24 @@ class packed_interp_gen(Pdf_rows_gen): See qp.interp_pdf for details on interpolation """ + # pylint: disable=protected-access - name = 'packed_interp' + name = "packed_interp" version = 0 _support_mask = rv_continuous._support_mask - def __init__(self, xvals, ypacked, ymax, *args, packing_type=PackingType.linear_from_rowmax, log_floor=-3., **kwargs): + def __init__( + self, + xvals, + ypacked, + ymax, + *args, + packing_type=PackingType.linear_from_rowmax, + log_floor=-3.0, + **kwargs, + ): """ Create a new distribution by interpolating the given values @@ -74,15 +85,17 @@ def __init__(self, xvals, ypacked, ymax, *args, packing_type=PackingType.linear_ ymax : array_like The maximum y-values for each pdf """ - if np.size(xvals) != np.shape(ypacked)[-1]: # pragma: no cover - raise ValueError("Shape of xbins in xvals (%s) != shape of xbins in yvals (%s)" % - (np.size(xvals), np.shape(ypacked)[-1])) + if np.size(xvals) != np.shape(ypacked)[-1]: # pragma: no cover + raise ValueError( + "Shape of xbins in xvals (%s) != shape of xbins in yvals (%s)" + % (np.size(xvals), np.shape(ypacked)[-1]) + ) self._xvals = np.asarray(xvals) # Set support self._xmin = self._xvals[0] self._xmax = self._xvals[-1] - kwargs['shape'] = np.shape(ypacked)[:-1] + kwargs["shape"] = np.shape(ypacked)[:-1] self._yvals = None if isinstance(packing_type, PackingType): @@ -93,20 +106,20 @@ def __init__(self, xvals, ypacked, ymax, *args, packing_type=PackingType.linear_ self._ymax = reshape_to_pdf_size(ymax, -1) self._ypacked = reshape_to_pdf_size(ypacked, -1) - check_input = kwargs.pop('check_input', True) + check_input = kwargs.pop("check_input", True) if check_input: self._compute_ycumul() - self._yvals = (self._yvals.T / self._ycumul[:,-1]).T - self._ycumul = (self._ycumul.T / self._ycumul[:,-1]).T + self._yvals = (self._yvals.T / self._ycumul[:, -1]).T + self._ycumul = (self._ycumul.T / self._ycumul[:, -1]).T else: # pragma: no cover self._ycumul = None super().__init__(*args, **kwargs) - self._addmetadata('xvals', self._xvals) - self._addmetadata('packing_type', self._packing_type) - self._addmetadata('log_floor', self._log_floor) - self._addobjdata('ypacked', self._ypacked) - self._addobjdata('ymax', self._ymax) + self._addmetadata("xvals", self._xvals) + self._addmetadata("packing_type", self._packing_type) + self._addmetadata("log_floor", self._log_floor) + self._addobjdata("ypacked", self._ypacked) + self._addobjdata("ymax", self._ymax) def _compute_ycumul(self): if self._yvals is None: @@ -114,12 +127,20 @@ def _compute_ycumul(self): copy_shape = np.array(self._yvals.shape) self._ycumul = np.ndarray(copy_shape) self._ycumul[:, 0] = 0.5 * self._yvals[:, 0] * (self._xvals[1] - self._xvals[0]) - self._ycumul[:, 1:] = np.cumsum((self._xvals[1:] - self._xvals[:-1]) * - 0.5 * np.add(self._yvals[:,1:], - self._yvals[:,:-1]), axis=1) + self._ycumul[:, 1:] = np.cumsum( + (self._xvals[1:] - self._xvals[:-1]) + * 0.5 + * np.add(self._yvals[:, 1:], self._yvals[:, :-1]), + axis=1, + ) def _unpack(self): - self._yvals = unpack_array(PackingType(self._packing_type), self._ypacked, row_max=self._ymax, log_floor=self._log_floor) + self._yvals = unpack_array( + PackingType(self._packing_type), + self._ypacked, + row_max=self._ymax, + log_floor=self._log_floor, + ) @property def xvals(self): @@ -155,34 +176,42 @@ def _pdf(self, x, row): # pylint: disable=arguments-differ if self._yvals is None: # pragma: no cover self._unpack() - return interpolate_x_multi_y(x, row, self._xvals, self._yvals, - bounds_error=False, fill_value=0.).ravel() + return interpolate_x_multi_y( + x, row, self._xvals, self._yvals, bounds_error=False, fill_value=0.0 + ).ravel() def _cdf(self, x, row): # pylint: disable=arguments-differ if self._ycumul is None: # pragma: no cover self._compute_ycumul() - return interpolate_x_multi_y(x, row, self._xvals, self._ycumul, - bounds_error=False, fill_value=(0.,1.)).ravel() + return interpolate_x_multi_y( + x, row, self._xvals, self._ycumul, bounds_error=False, fill_value=(0.0, 1.0) + ).ravel() def _ppf(self, x, row): # pylint: disable=arguments-differ if self._ycumul is None: # pragma: no cover self._compute_ycumul() - return interpolate_multi_x_y(x, row, self._ycumul, self._xvals, - bounds_error=False, fill_value=(self._xmin, self._xmax)).ravel() + return interpolate_multi_x_y( + x, + row, + self._ycumul, + self._xvals, + bounds_error=False, + fill_value=(self._xmin, self._xmax), + ).ravel() def _munp(self, m, *args): - """ compute moments """ + """compute moments""" # pylint: disable=arguments-differ # Silence floating point warnings from integration. - with np.errstate(all='ignore'): + with np.errstate(all="ignore"): vals = self.custom_generic_moment(m) return vals def custom_generic_moment(self, m): - """ Compute the mth moment """ + """Compute the mth moment""" m = np.asarray(m) dx = self._xvals[1] - self._xvals[0] if self._yvals is None: # pragma: no cover @@ -194,11 +223,11 @@ def _updated_ctor_param(self): Set the bin edges and packing data as additional constructor argument """ dct = super()._updated_ctor_param() - dct['xvals'] = self._xvals - dct['ypacked'] = self._ypacked - dct['ymax'] = self._ymax - dct['log_floor'] = self._log_floor - dct['packing_type'] = PackingType(self._packing_type) + dct["xvals"] = self._xvals + dct["ypacked"] = self._ypacked + dct["ymax"] = self._ymax + dct["log_floor"] = self._log_floor + dct["packing_type"] = PackingType(self._packing_type) return dct @classmethod @@ -214,12 +243,12 @@ def get_allocation_kwds(cls, npdf, **kwargs): kwargs: dict dictionary of kwargs needed to create the ensemble """ - if 'xvals' not in kwargs: #pragma: no cover + if "xvals" not in kwargs: # pragma: no cover raise ValueError("required argument xvals not included in kwargs") - ngrid = np.shape(kwargs['xvals'])[-1] + ngrid = np.shape(kwargs["xvals"])[-1] return dict( - ypacked=((npdf, ngrid), 'u1'), - ymax=((npdf, 1), 'f4'), + ypacked=((npdf, ngrid), "u1"), + ymax=((npdf, 1), "f4"), ) @classmethod @@ -241,20 +270,43 @@ def add_mappings(cls): @classmethod def make_test_data(cls): - """ Make data for unit tests """ - ypacked_lin, ymax_lin = pack_array(PackingType.linear_from_rowmax, YARRAY.copy()) - ypacked_log, ymax_log = pack_array(PackingType.log_from_rowmax, YARRAY.copy(), log_floor=-3) + """Make data for unit tests""" + ypacked_lin, ymax_lin = pack_array( + PackingType.linear_from_rowmax, YARRAY.copy() + ) + ypacked_log, ymax_log = pack_array( + PackingType.log_from_rowmax, YARRAY.copy(), log_floor=-3 + ) cls.test_data = dict( lin_packed_interp=dict( gen_func=packed_interp, - ctor_data=dict(packing_type=PackingType.linear_from_rowmax, xvals=XBINS, ypacked=ypacked_lin, ymax=ymax_lin), - convert_data=dict(xvals=XBINS, packing_type=PackingType.linear_from_rowmax), test_xvals=TEST_XVALS + ctor_data=dict( + packing_type=PackingType.linear_from_rowmax, + xvals=XBINS, + ypacked=ypacked_lin, + ymax=ymax_lin, + ), + convert_data=dict( + xvals=XBINS, packing_type=PackingType.linear_from_rowmax + ), + test_xvals=TEST_XVALS, ), log_packed_interp=dict( gen_func=packed_interp, - ctor_data=dict(packing_type=PackingType.log_from_rowmax, xvals=XBINS, ypacked=ypacked_log, ymax=ymax_log, log_floor=-3.), - convert_data=dict(xvals=XBINS, packing_type=PackingType.log_from_rowmax, log_floor=-3.), test_xvals=TEST_XVALS + ctor_data=dict( + packing_type=PackingType.log_from_rowmax, + xvals=XBINS, + ypacked=ypacked_log, + ymax=ymax_log, + log_floor=-3.0, + ), + convert_data=dict( + xvals=XBINS, + packing_type=PackingType.log_from_rowmax, + log_floor=-3.0, + ), + test_xvals=TEST_XVALS, ), ) diff --git a/src/qp/packing_utils.py b/src/qp/packing_utils.py index 59864d77..e58ebb55 100644 --- a/src/qp/packing_utils.py +++ b/src/qp/packing_utils.py @@ -4,9 +4,7 @@ import numpy as np - class PackingType(enum.Enum): - linear_from_rowmax = 0 log_from_rowmax = 1 @@ -48,11 +46,11 @@ def linear_unpack_from_rowmax(packed_array, row_max): unpacked_array : array_like The unpacked values """ - unpacked_array = row_max * packed_array / 255. + unpacked_array = row_max * packed_array / 255.0 return unpacked_array -def log_pack_from_rowmax(input_array, log_floor=-3.): +def log_pack_from_rowmax(input_array, log_floor=-3.0): """Pack an array into 8bit unsigned integers, using the maximum of each row as a refrence This packs the values onto a log grid for each row, running from row_max / 10**log_floor to row_max @@ -71,13 +69,22 @@ def log_pack_from_rowmax(input_array, log_floor=-3.): row_max : array_like The max for each row, need to unpack the array """ - neg_log_floor = -1. * log_floor - epsilon = np.power(10., 3 * log_floor) + neg_log_floor = -1.0 * log_floor + epsilon = np.power(10.0, 3 * log_floor) row_max = np.expand_dims(input_array.max(axis=1), -1) - return np.round(255*(np.log10((input_array + epsilon) / row_max) + neg_log_floor)/neg_log_floor).clip(0., 255.).astype(np.uint8), row_max - - -def log_unpack_from_rowmax(packed_array, row_max, log_floor=-3.): + return ( + np.round( + 255 + * (np.log10((input_array + epsilon) / row_max) + neg_log_floor) + / neg_log_floor + ) + .clip(0.0, 255.0) + .astype(np.uint8), + row_max, + ) + + +def log_unpack_from_rowmax(packed_array, row_max, log_floor=-3.0): """Unpack an array into 8bit unsigned integers, using the maximum of each row as a refrence Parameters @@ -94,8 +101,12 @@ def log_unpack_from_rowmax(packed_array, row_max, log_floor=-3.): unpacked_array : array_like The unpacked values """ - neg_log_floor = -1. * log_floor - unpacked_array = row_max * np.where(packed_array == 0, 0., np.power(10, neg_log_floor * ( (packed_array / 255.) - 1. ) )) + neg_log_floor = -1.0 * log_floor + unpacked_array = row_max * np.where( + packed_array == 0, + 0.0, + np.power(10, neg_log_floor * ((packed_array / 255.0) - 1.0)), + ) return unpacked_array @@ -115,8 +126,10 @@ def pack_array(packing_type, input_array, **kwargs): if packing_type == PackingType.linear_from_rowmax: return linear_pack_from_rowmax(input_array) if packing_type == PackingType.log_from_rowmax: - return log_pack_from_rowmax(input_array, kwargs.get('log_floor', -3)) - raise ValueError(f"Packing for packing type {packing_type} is not implemetned") # pragma: no cover + return log_pack_from_rowmax(input_array, kwargs.get("log_floor", -3)) + raise ValueError( + f"Packing for packing type {packing_type} is not implemetned" + ) # pragma: no cover def unpack_array(packing_type, packed_array, **kwargs): @@ -132,7 +145,13 @@ def unpack_array(packing_type, packed_array, **kwargs): Return values and keyword argument depend on the packing type used """ if packing_type == PackingType.linear_from_rowmax: - return linear_unpack_from_rowmax(packed_array, row_max=kwargs.get('row_max')) + return linear_unpack_from_rowmax(packed_array, row_max=kwargs.get("row_max")) if packing_type == PackingType.log_from_rowmax: - return log_unpack_from_rowmax(packed_array, row_max=kwargs.get('row_max'), log_floor=kwargs.get('log_floor', -3)) - raise ValueError(f"Unpacking for packing type {packing_type} is not implemetned") # pragma: no cover + return log_unpack_from_rowmax( + packed_array, + row_max=kwargs.get("row_max"), + log_floor=kwargs.get("log_floor", -3), + ) + raise ValueError( + f"Unpacking for packing type {packing_type} is not implemetned" + ) # pragma: no cover diff --git a/src/qp/pdf_gen.py b/src/qp/pdf_gen.py index 5898eaf5..36fefac1 100644 --- a/src/qp/pdf_gen.py +++ b/src/qp/pdf_gen.py @@ -53,8 +53,8 @@ def __init__(self, *args, **kwargs): self._addclassmetadata(type(self)) def _addclassmetadata(self, cls): - self._metadata['pdf_name'] = np.array([cls.name.encode()]) - self._metadata['pdf_version'] = np.array([cls.version]) + self._metadata["pdf_name"] = np.array([cls.name.encode()]) + self._metadata["pdf_version"] = np.array([cls.version]) def _addmetadata(self, key, val): self._metadata[key] = np.expand_dims(val, 0) @@ -108,7 +108,7 @@ def _add_extraction_method(cls, the_func, method): set_val_or_default(cls._extraction_map, method, the_func) @classmethod - def _add_reader_method(cls, the_func, version): #pragma: no cover + def _add_reader_method(cls, the_func, version): # pragma: no cover """Add a method used to convert data read from a file PDF of this type""" set_val_or_default(cls._reader_map, version, the_func) @@ -119,13 +119,12 @@ def print_method_maps(cls, stream=sys.stdout): pretty_print(cls._extraction_map, ["Extract "], stream=stream) pretty_print(cls._reader_map, ["Reader "], stream=stream) - @classmethod def create_gen(cls, **kwds): """Create and return a `scipy.stats.rv_continuous` object using the keyword arguemntets provided""" kwds_copy = kwds.copy() - name = kwds_copy.pop('name', 'dist') + name = kwds_copy.pop("name", "dist") return (cls(name=name), kwds_copy) @classmethod @@ -154,7 +153,7 @@ def get_allocation_kwds(cls, npdf, **kwargs): """Return kwds necessary to create 'empty' hdf5 file with npdf entries for iterative writeout """ - raise NotImplementedError() #pragma: no cover + raise NotImplementedError() # pragma: no cover class rv_frozen_func(rv_continuous_frozen): @@ -173,11 +172,11 @@ def __init__(self, dist, *args, **kwds): The number of PDFs this object represents """ super().__init__(dist, *args, **kwds) - array_list = [ np.array(val) for val in self.kwds.values() ] + array_list = [np.array(val) for val in self.kwds.values()] bc = np.broadcast(array_list) ss = bc.shape if len(ss) < 2: - self._shape = (1) + self._shape = 1 else: self._shape = ss[1:-1] self._npdf = np.product(self._shape).astype(int) @@ -214,8 +213,8 @@ def histogramize(self, bins): of bins and values in bins """ cdf_vals = reshape_to_pdf_size(self.cdf(bins), -1) - bin_vals = cdf_vals[:,1:] - cdf_vals[:,0:-1] - return (bins, reshape_to_pdf_shape(bin_vals, self._shape, bins.size-1)) + bin_vals = cdf_vals[:, 1:] - cdf_vals[:, 0:-1] + return (bins, reshape_to_pdf_shape(bin_vals, self._shape, bins.size - 1)) class rv_frozen_rows(rv_continuous_frozen): @@ -231,7 +230,9 @@ def __init__(self, dist, shape, *args, **kwds): self._npdf = np.product(shape).astype(int) self._ndim = np.size(shape) if self._npdf is not None: - kwds.setdefault('row', np.expand_dims(np.arange(self._npdf).reshape(self._shape), -1)) + kwds.setdefault( + "row", np.expand_dims(np.arange(self._npdf).reshape(self._shape), -1) + ) super().__init__(dist, *args, **kwds) @property @@ -265,9 +266,8 @@ def histogramize(self, bins): of bins and values in bins """ cdf_vals = reshape_to_pdf_size(self.cdf(bins), -1) - bin_vals = cdf_vals[:,1:] - cdf_vals[:,0:-1] - return (bins, reshape_to_pdf_shape(bin_vals, self._shape, bins.size-1)) - + bin_vals = cdf_vals[:, 1:] - cdf_vals[:, 0:-1] + return (bins, reshape_to_pdf_shape(bin_vals, self._shape, bins.size - 1)) class Pdf_rows_gen(rv_continuous, Pdf_gen): @@ -277,9 +277,10 @@ class Pdf_rows_gen(rv_continuous, Pdf_gen): where each object represents a single distribtuion """ + def __init__(self, *args, **kwargs): """C'tor""" - self._shape = kwargs.pop('shape', (1)) + self._shape = kwargs.pop("shape", (1)) self._npdf = np.product(self._shape).astype(int) super().__init__(*args, **kwargs) @@ -294,7 +295,7 @@ def npdf(self): return self._npdf @staticmethod - def _sliceargs(x, row, *args): #pragma: no cover + def _sliceargs(x, row, *args): # pragma: no cover if np.size(x) == 1 or np.size(row) == 1: return False, x, row, args xx = np.unique(x) @@ -305,14 +306,14 @@ def _sliceargs(x, row, *args): #pragma: no cover rr = row if np.size(xx) * np.size(rr) != np.size(x): return False, x, row, args - outargs = [arg[0:np.size(xx)] for arg in args] + outargs = [arg[0 : np.size(xx)] for arg in args] return True, xx, rr, outargs def _rvs(self, *args, size=None, random_state=None): # Use basic inverse cdf algorithm for RV generation as default. U = random_state.uniform(size=size) Y = self._ppf(U, *args) - if size is None: #pragma: no cover + if size is None: # pragma: no cover return Y return Y.reshape(size) @@ -323,7 +324,10 @@ def _argcheck(self, *args): """ cond = 1 if args: - cond = np.logical_and(cond, np.logical_and(asarray(args[0]) >= 0, asarray(args[0]) < self._npdf)) + cond = np.logical_and( + cond, + np.logical_and(asarray(args[0]) >= 0, asarray(args[0]) < self._npdf), + ) return np.atleast_1d(cond) def freeze(self, *args, **kwds): @@ -349,12 +353,15 @@ def create_gen(cls, **kwds): return (cls(**kwds), {}) def _scipy_version_warning(self): - import scipy #pylint: disable=import-outside-toplevel + import scipy # pylint: disable=import-outside-toplevel + scipy_version = scipy.__version__ - vtuple = scipy_version.split('.') + vtuple = scipy_version.split(".") if int(vtuple[0]) > 1 or int(vtuple[1]) > 7: return - raise DeprecationWarning(f"Ensemble.moments will not work correctly with scipy version < 1.8.0, you have {scipy_version}") #pragma: no cover + raise DeprecationWarning( + f"Ensemble.moments will not work correctly with scipy version < 1.8.0, you have {scipy_version}" + ) # pragma: no cover def moment(self, n, *args, **kwds): """Returns the moments request moments for all the PDFs. @@ -376,12 +383,12 @@ def moment(self, n, *args, **kwds): return rv_continuous.moment(self, n, *args, **kwds) - class Pdf_gen_wrap(Pdf_gen): """Mixin class to extend `scipy.stats.rv_continuous` with information needed for `qp` for analytic distributions. """ + def __init__(self, *args, **kwargs): """C'tor""" # pylint: disable=no-member,protected-access @@ -406,8 +413,7 @@ def _my_freeze(self, *args, **kwds): @classmethod def get_allocation_kwds(cls, npdf, **kwargs): - return {key:((npdf,1), val.dtype) for key, val in kwargs.items()} - + return {key: ((npdf, 1), val.dtype) for key, val in kwargs.items()} @classmethod def add_mappings(cls): diff --git a/src/qp/plotting.py b/src/qp/plotting.py index 0f196985..0bff4931 100644 --- a/src/qp/plotting.py +++ b/src/qp/plotting.py @@ -12,32 +12,32 @@ def init_matplotlib(): # default # mpl.rcParams['text.usetex'] = True # mpl.rcParams['mathtext.rm'] = 'serif' - mpl.rcParams['font.family'] = 'serif' - mpl.rcParams['font.serif'] = 'Times New Roman' - mpl.rcParams['axes.titlesize'] = 16 - mpl.rcParams['axes.labelsize'] = 16 - mpl.rcParams['savefig.dpi'] = 250 - mpl.rcParams['savefig.format'] = 'pdf' - mpl.rcParams['savefig.bbox'] = 'tight' + mpl.rcParams["font.family"] = "serif" + mpl.rcParams["font.serif"] = "Times New Roman" + mpl.rcParams["axes.titlesize"] = 16 + mpl.rcParams["axes.labelsize"] = 16 + mpl.rcParams["savefig.dpi"] = 250 + mpl.rcParams["savefig.format"] = "pdf" + mpl.rcParams["savefig.bbox"] = "tight" # init_matplotlib() COLORS = {} -COLORS['truth'] = 'k' -COLORS['mix_mod'] = 'k' -COLORS['gridded'] = 'k' -COLORS['quantiles'] = 'blueviolet' -COLORS['histogram'] = 'darkorange' -COLORS['samples'] = 'forestgreen' +COLORS["truth"] = "k" +COLORS["mix_mod"] = "k" +COLORS["gridded"] = "k" +COLORS["quantiles"] = "blueviolet" +COLORS["histogram"] = "darkorange" +COLORS["samples"] = "forestgreen" STYLES = {} -STYLES['truth'] = '-' -STYLES['mix_mod'] = ':' -STYLES['gridded'] = '--' -STYLES['quantiles'] = '--'#(0,(5,10)) -STYLES['histogram'] = ':'#(0,(3,6)) -STYLES['samples'] = '-.'#(0,(1,2)) +STYLES["truth"] = "-" +STYLES["mix_mod"] = ":" +STYLES["gridded"] = "--" +STYLES["quantiles"] = "--" # (0,(5,10)) +STYLES["histogram"] = ":" # (0,(3,6)) +STYLES["samples"] = "-." # (0,(1,2)) def make_figure_axes(xlim, **kwargs): @@ -56,8 +56,8 @@ def make_figure_axes(xlim, **kwargs): fig, axes : The figure and axes """ - xlabel = kwargs.pop('xlabel', 'redshift') - ylabel = kwargs.pop('ylabel', 'p(z)') + xlabel = kwargs.pop("xlabel", "redshift") + ylabel = kwargs.pop("ylabel", "p(z)") fig = plt.figure() axes = fig.add_subplot(111) @@ -70,20 +70,19 @@ def make_figure_axes(xlim, **kwargs): def get_axes_and_xlims(**kwargs): """Get and return the axes and xlims from the kwargs""" - axes = kwargs.pop('axes', None) - xlim = kwargs.pop('xlim', None) + axes = kwargs.pop("axes", None) + xlim = kwargs.pop("xlim", None) if axes is None: - if xlim is None: #pragma: no cover + if xlim is None: # pragma: no cover raise ValueError("Either xlim or axes must be provided") _, axes = make_figure_axes(xlim, **kwargs) else: - if xlim is not None: #pragma: no cover + if xlim is not None: # pragma: no cover raise ValueError("Only one of xlim and axes should be provided") xlim = axes.get_xlim() return axes, xlim, kwargs - def plot_pdf_on_axes(axes, pdf, xvals, **kwargs): """ Plot a PDF on a set of axes, by evaluating it a set of points @@ -130,7 +129,7 @@ def plot_dist_pdf(pdf, **kwargs): npts : int The number of x-axis points - remaining kwargs : + remaining kwargs : passed directly to the `plot_pdf_on_axes` plot function Return @@ -138,12 +137,11 @@ def plot_dist_pdf(pdf, **kwargs): axes : The axes the data are plotted on """ axes, xlim, kw = get_axes_and_xlims(**kwargs) - npoints = kw.pop('npts', 101) + npoints = kw.pop("npts", 101) xvals = np.linspace(xlim[0], xlim[1], npoints) return plot_pdf_on_axes(axes, pdf, xvals, **kw) - def plot_pdf_quantiles_on_axes(axes, xvals, yvals, quantiles, **kwargs): """ Plot a PDF on a set of axes, by evaluating at the quantiles provided @@ -160,7 +158,7 @@ def plot_pdf_quantiles_on_axes(axes, xvals, yvals, quantiles, **kwargs): quantiles : (`np.array`, `np.array`) The quantiles that define the distribution pdf - **kwargs : + **kwargs : passed directly to the `matplotlib` plot function Other Parameters @@ -172,11 +170,18 @@ def plot_pdf_quantiles_on_axes(axes, xvals, yvals, quantiles, **kwargs): ------ axes : The axes the data are plotted on """ - kwargs.setdefault('label', 'Quantiles') - kwargs.setdefault('color', COLORS['quantiles']) - axes.scatter(quantiles[1], np.zeros(np.shape(quantiles[1])), marker='|', s=100, alpha=0.75, **kwargs) - kwargs.setdefault('label', 'Quantile Interpolated PDF') - axes.plot(xvals, yvals, lw=2.0, alpha=1.0, linestyle=STYLES['quantiles'], **kwargs) + kwargs.setdefault("label", "Quantiles") + kwargs.setdefault("color", COLORS["quantiles"]) + axes.scatter( + quantiles[1], + np.zeros(np.shape(quantiles[1])), + marker="|", + s=100, + alpha=0.75, + **kwargs, + ) + kwargs.setdefault("label", "Quantile Interpolated PDF") + axes.plot(xvals, yvals, lw=2.0, alpha=1.0, linestyle=STYLES["quantiles"], **kwargs) return axes @@ -186,9 +191,9 @@ def plot_pdf_histogram_on_axes(axes, hist, **kwargs): Parameters ---------- - axes : + axes : The axes we want to plot the data on - **kwargs : + **kwargs : passed directly to the `matplotlib` plot function Other Parameters @@ -198,19 +203,32 @@ def plot_pdf_histogram_on_axes(axes, hist, **kwargs): Return ------ - axes : + axes : The axes the data are plotted on """ - kwargs.setdefault('color', COLORS['histogram']) - axes.scatter(hist[0], np.zeros(np.shape(hist[0])), marker='|', s=100, label='Histogram Bin Ends', alpha=0.75) - bin_centers = (hist[0][0:-1] + hist[0][1:])/2. - kwargs.setdefault('label', 'Histogram Interpolated PDF') - axes.hist(bin_centers, bins=hist[0], weights=np.squeeze(hist[1]), lw=None, alpha=1.0, **kwargs) + kwargs.setdefault("color", COLORS["histogram"]) + axes.scatter( + hist[0], + np.zeros(np.shape(hist[0])), + marker="|", + s=100, + label="Histogram Bin Ends", + alpha=0.75, + ) + bin_centers = (hist[0][0:-1] + hist[0][1:]) / 2.0 + kwargs.setdefault("label", "Histogram Interpolated PDF") + axes.hist( + bin_centers, + bins=hist[0], + weights=np.squeeze(hist[1]), + lw=None, + alpha=1.0, + **kwargs, + ) return axes - def plot_pdf_samples_on_axes(axes, pdf, samples, **kwargs): """ Plot a PDF on a set of axes, by displaying a set of samples from the PDF @@ -224,28 +242,30 @@ def plot_pdf_samples_on_axes(axes, pdf, samples, **kwargs): samples : `np.array` Points sampled from the PDF - **kwargs : + **kwargs : passed directly to the `matplotlib` plot function Return ------ axes : The axes the data are plotted on """ - kwargs.setdefault('label', 'Samples') - kwargs.setdefault('color', COLORS['samples']) - axes.scatter(samples, np.zeros(np.shape(samples)), marker='|', s=100, alpha=0.75, **kwargs) - npoints = kwargs.pop('npoints', 101) + kwargs.setdefault("label", "Samples") + kwargs.setdefault("color", COLORS["samples"]) + axes.scatter( + samples, np.zeros(np.shape(samples)), marker="|", s=100, alpha=0.75, **kwargs + ) + npoints = kwargs.pop("npoints", 101) xlim = axes.get_xlim() xvals = np.linspace(xlim[0], xlim[1], npoints) yvals = np.squeeze(pdf.pdf(xvals)) - kwargs.setdefault('label', 'Samples Interpolated PDF') - plt.plot(xvals, yvals, lw=2.0, alpha=1.0, linestyle=STYLES['samples'], **kwargs) + kwargs.setdefault("label", "Samples Interpolated PDF") + plt.plot(xvals, yvals, lw=2.0, alpha=1.0, linestyle=STYLES["samples"], **kwargs) return axes def plot_native(pdf, **kwargs): """Utility function to plot a pdf in a format that is specific to that type of pdf""" - if hasattr(pdf, 'plot_native'): + if hasattr(pdf, "plot_native"): axes = pdf.plot_native(**kwargs) else: axes = pdf.dist.plot_native(pdf, **kwargs) @@ -254,7 +274,7 @@ def plot_native(pdf, **kwargs): def plot(pdf, **kwargs): """Utility function to plot a pdf in a format that is specific to that type of pdf""" - if hasattr(pdf, 'plot_native'): + if hasattr(pdf, "plot_native"): axes = pdf.plot(**kwargs) else: axes = pdf.dist.plot(pdf, **kwargs) diff --git a/src/qp/quant_pdf.py b/src/qp/quant_pdf.py index 031fb90a..fa80ef30 100644 --- a/src/qp/quant_pdf.py +++ b/src/qp/quant_pdf.py @@ -10,16 +10,19 @@ from qp.factory import add_class from qp.pdf_gen import Pdf_rows_gen from qp.plotting import get_axes_and_xlims, plot_pdf_quantiles_on_axes -from qp.quantile_pdf_constructors import (AbstractQuantilePdfConstructor, - CdfSplineDerivative, - DualSplineAverage, PiecewiseConstant, - PiecewiseLinear) +from qp.quantile_pdf_constructors import ( + AbstractQuantilePdfConstructor, + CdfSplineDerivative, + DualSplineAverage, + PiecewiseConstant, + PiecewiseLinear, +) from qp.test_data import QLOCS, QUANTS, TEST_XVALS -from qp.utils import (interpolate_multi_x_y, interpolate_x_multi_y, - reshape_to_pdf_size) +from qp.utils import interpolate_multi_x_y, interpolate_x_multi_y, reshape_to_pdf_size epsilon = sys.float_info.epsilon + def pad_quantiles(quants, locs): """Pad the quantiles and locations used to build a quantile representation @@ -45,7 +48,7 @@ def pad_quantiles(quants, locs): else: offset_lo = 0 pad_lo = False - if quants[-1] < 1.: + if quants[-1] < 1.0: pad_hi = True n_out += 1 else: @@ -54,26 +57,32 @@ def pad_quantiles(quants, locs): return quants, locs quants_out = np.zeros((n_out), quants.dtype) locs_out = np.zeros((locs.shape[0], n_out), quants.dtype) - quants_out[offset_lo:n_vals+offset_lo] = quants - locs_out[:,offset_lo:n_vals+offset_lo] = locs + quants_out[offset_lo : n_vals + offset_lo] = quants + locs_out[:, offset_lo : n_vals + offset_lo] = locs if pad_lo: - locs_out[:, 0] = locs[:, 0] - quants[0] * (locs[:, 1] - locs[:, 0]) / (quants[1] - quants[0]) + locs_out[:, 0] = locs[:, 0] - quants[0] * (locs[:, 1] - locs[:, 0]) / ( + quants[1] - quants[0] + ) if pad_hi: - quants_out[-1] = 1. - locs_out[:, -1] = locs[:, -1] - (1. - quants[-1]) * (locs[:, -2] - locs[:, -1]) / (quants[-1] - quants[-2]) + quants_out[-1] = 1.0 + locs_out[:, -1] = locs[:, -1] - (1.0 - quants[-1]) * ( + locs[:, -2] - locs[:, -1] + ) / (quants[-1] - quants[-2]) return quants_out, locs_out -DEFAULT_PDF_CONSTRUCTOR = 'piecewise_linear' + +DEFAULT_PDF_CONSTRUCTOR = "piecewise_linear" PDF_CONSTRUCTORS = { - 'cdf_spline_derivative': CdfSplineDerivative, - 'dual_spline_average': DualSplineAverage, - 'piecewise_linear': PiecewiseLinear, - 'piecewise_constant': PiecewiseConstant, + "cdf_spline_derivative": CdfSplineDerivative, + "dual_spline_average": DualSplineAverage, + "piecewise_linear": PiecewiseLinear, + "piecewise_constant": PiecewiseConstant, } -class quant_gen(Pdf_rows_gen): + +class quant_gen(Pdf_rows_gen): # pylint: disable=too-many-instance-attributes """Quantile based distribution, where the PDF is defined piecewise from the quantiles Notes @@ -83,9 +92,10 @@ class quant_gen(Pdf_rows_gen): It simply takes a set of x and y values and uses `scipy.interpolate.interp1d` to build the CDF """ + # pylint: disable=protected-access - name = 'quant' + name = "quant" version = 0 _support_mask = rv_continuous._support_mask @@ -106,27 +116,32 @@ def __init__(self, quants, locs, *args, **kwargs): self._xmax = np.max(locs) locs_2d = reshape_to_pdf_size(locs, -1) - self._check_input = kwargs.pop('check_input', True) + self._check_input = kwargs.pop("check_input", True) if self._check_input: quants, locs_2d = pad_quantiles(quants, locs_2d) self._quants = np.asarray(quants) self._nquants = self._quants.size if locs_2d.shape[-1] != self._nquants: # pragma: no cover - raise ValueError("Number of locations (%i) != number of quantile values (%i)" % (self._nquants, locs_2d.shape[-1])) + raise ValueError( + "Number of locations (%i) != number of quantile values (%i)" + % (self._nquants, locs_2d.shape[-1]) + ) self._locs = locs_2d - self._pdf_constructor_name = str(kwargs.pop('pdf_constructor_name', DEFAULT_PDF_CONSTRUCTOR)) + self._pdf_constructor_name = str( + kwargs.pop("pdf_constructor_name", DEFAULT_PDF_CONSTRUCTOR) + ) self._pdf_constructor = None self._instantiate_pdf_constructor() - kwargs['shape'] = locs.shape[:-1] + kwargs["shape"] = locs.shape[:-1] super().__init__(*args, **kwargs) - self._addmetadata('quants', self._quants) - self._addmetadata('pdf_constructor_name', self._pdf_constructor_name) - self._addmetadata('check_input', self._check_input) - self._addobjdata('locs', self._locs) + self._addmetadata("quants", self._quants) + self._addmetadata("pdf_constructor_name", self._pdf_constructor_name) + self._addmetadata("check_input", self._check_input) + self._addobjdata("locs", self._locs) @property def quants(self): @@ -162,7 +177,9 @@ def pdf_constructor_name(self, value: str): a value error. """ if value not in PDF_CONSTRUCTORS: - raise ValueError(f"Unknown interpolator provided: '{value}'. Allowed interpolators are {list(PDF_CONSTRUCTORS.keys())}") + raise ValueError( + f"Unknown interpolator provided: '{value}'. Allowed interpolators are {list(PDF_CONSTRUCTORS.keys())}" # pylint: disable=line-too-long + ) if value is self._pdf_constructor_name: logging.warning("Already using interpolator: '%s'.", value) @@ -170,7 +187,7 @@ def pdf_constructor_name(self, value: str): self._pdf_constructor_name = value self._instantiate_pdf_constructor() - self._addmetadata('pdf_constructor_name', self._pdf_constructor_name) + self._addmetadata("pdf_constructor_name", self._pdf_constructor_name) @property def pdf_constructor(self) -> AbstractQuantilePdfConstructor: @@ -186,8 +203,8 @@ def pdf_constructor(self) -> AbstractQuantilePdfConstructor: def _instantiate_pdf_constructor(self): self._pdf_constructor = PDF_CONSTRUCTORS[self._pdf_constructor_name]( - self._quants, self._locs) - + self._quants, self._locs + ) def _pdf(self, x, *args): # We're not requiring that the output be normalized! @@ -200,23 +217,37 @@ def _pdf(self, x, *args): def _cdf(self, x, row): # pylint: disable=arguments-differ - return interpolate_multi_x_y(x, row, self._locs, self._quants, - bounds_error=False, fill_value=(0., 1), kind="quadratic").ravel() + return interpolate_multi_x_y( + x, + row, + self._locs, + self._quants, + bounds_error=False, + fill_value=(0.0, 1), + kind="quadratic", + ).ravel() def _ppf(self, x, row): # pylint: disable=arguments-differ - return interpolate_x_multi_y(x, row, self._quants, self._locs, - bounds_error=False, fill_value=(self._xmin, self._xmax), kind="quadratic").ravel() + return interpolate_x_multi_y( + x, + row, + self._quants, + self._locs, + bounds_error=False, + fill_value=(self._xmin, self._xmax), + kind="quadratic", + ).ravel() def _updated_ctor_param(self): """ Set the quants and locs as additional constructor arguments """ dct = super()._updated_ctor_param() - dct['quants'] = self._quants - dct['locs'] = self._locs - dct['pdf_constructor_name'] = self._pdf_constructor_name - dct['check_input'] = self._check_input + dct["quants"] = self._quants + dct["locs"] = self._locs + dct["pdf_constructor_name"] = self._pdf_constructor_name + dct["check_input"] = self._check_input return dct @classmethod @@ -226,11 +257,11 @@ def get_allocation_kwds(cls, npdf, **kwargs): the metadata can be written when we finalize the file. """ try: - quants = kwargs['quants'] - except ValueError: #pragma: no cover + quants = kwargs["quants"] + except ValueError: # pragma: no cover print("required argument 'quants' not included in kwargs") nquants = np.shape(quants)[-1] - return dict(locs=((npdf, nquants), 'f4')) + return dict(locs=((npdf, nquants), "f4")) @classmethod def plot_native(cls, pdf, **kwargs): @@ -239,11 +270,13 @@ def plot_native(cls, pdf, **kwargs): For a quantile this shows the quantiles points """ axes, xlim, kw = get_axes_and_xlims(**kwargs) - xvals = np.linspace(xlim[0], xlim[1], kw.pop('npts', 101)) - locs = np.squeeze(pdf.dist.locs[pdf.kwds['row']]) + xvals = np.linspace(xlim[0], xlim[1], kw.pop("npts", 101)) + locs = np.squeeze(pdf.dist.locs[pdf.kwds["row"]]) quants = np.squeeze(pdf.dist.quants) yvals = np.squeeze(pdf.pdf(xvals)) - return plot_pdf_quantiles_on_axes(axes, xvals, yvals, quantiles=(quants, locs), **kw) + return plot_pdf_quantiles_on_axes( + axes, xvals, yvals, quantiles=(quants, locs), **kw + ) @classmethod def add_mappings(cls): @@ -256,7 +289,13 @@ def add_mappings(cls): quant = quant_gen.create -quant_gen.test_data = dict(quant=dict(gen_func=quant, ctor_data=dict(quants=QUANTS, locs=QLOCS),\ - convert_data=dict(quants=QUANTS), test_xvals=TEST_XVALS)) +quant_gen.test_data = dict( + quant=dict( + gen_func=quant, + ctor_data=dict(quants=QUANTS, locs=QLOCS), + convert_data=dict(quants=QUANTS), + test_xvals=TEST_XVALS, + ) +) add_class(quant_gen) diff --git a/src/qp/quantile_pdf_constructors/cdf_spline_derivative.py b/src/qp/quantile_pdf_constructors/cdf_spline_derivative.py index 34a9dcb3..784efbec 100644 --- a/src/qp/quantile_pdf_constructors/cdf_spline_derivative.py +++ b/src/qp/quantile_pdf_constructors/cdf_spline_derivative.py @@ -49,7 +49,9 @@ def prepare_constructor(self, spline_order:int = 3) -> None: # ! create an issue (or fix) if the spline fit fails, can fall back to a simpler interpolator ??? self._interpolation_functions = [ - InterpolatedUnivariateSpline(self._locations[i,:], self._quantiles, k=spline_order, ext=1).derivative() + InterpolatedUnivariateSpline( + self._locations[i,:], self._quantiles, k=spline_order, ext=1 + ).derivative() for i in range(0,number_of_locations) ] diff --git a/src/qp/quantile_pdf_constructors/dual_spline_average.py b/src/qp/quantile_pdf_constructors/dual_spline_average.py index 839e78ef..1431ae3a 100644 --- a/src/qp/quantile_pdf_constructors/dual_spline_average.py +++ b/src/qp/quantile_pdf_constructors/dual_spline_average.py @@ -105,17 +105,26 @@ def construct_pdf(self, grid: List[float], row: List[int] = None) -> List[List[f filtered_p_of_zs = list(map(self._p_of_zs.__getitem__, np.unique(row))) filtered_locations = list(map(self._locations.__getitem__, np.unique(row))) - # Create a list of interpolated splines for the even and odd pairs of (specific_locations, specific_p_of_zs) + # Create a list of interpolated splines for the even and odd pairs of + # (specific_locations, specific_p_of_zs) f1 = np.asarray( [ - interp1d(np.squeeze(specific_locations[0::2]), np.squeeze(specific_p_of_zs[0::2]), bounds_error=False, fill_value=0.0, kind='cubic') + interp1d( + np.squeeze(specific_locations[0::2]), + np.squeeze(specific_p_of_zs[0::2]), + bounds_error=False, fill_value=0.0, kind='cubic' + ) for specific_p_of_zs, specific_locations in zip(filtered_p_of_zs, filtered_locations) ] ) f2 = np.asarray( [ - interp1d(np.squeeze(specific_locations[1::2]), np.squeeze(specific_p_of_zs[1::2]), bounds_error=False, fill_value=0.0, kind='cubic') + interp1d( + np.squeeze(specific_locations[1::2]), + np.squeeze(specific_p_of_zs[1::2]), + bounds_error=False, fill_value=0.0, kind='cubic' + ) for specific_p_of_zs, specific_locations in zip(filtered_p_of_zs, filtered_locations) ] ) @@ -138,7 +147,8 @@ def debug(self): _locations : Input during constructor instantiation _p_of_zs : - Resulting p(z) values found after calculating the area of trapezoids based on the difference between adjacent quantile values + Resulting p(z) values found after calculating the area of trapezoids based + on the difference between adjacent quantile values y1 : One of two splines fit to alternating pairs of (_locations, _p_of_zs) y2 : diff --git a/src/qp/quantile_pdf_constructors/piecewise_constant.py b/src/qp/quantile_pdf_constructors/piecewise_constant.py index ead2e946..9b85dc2a 100644 --- a/src/qp/quantile_pdf_constructors/piecewise_constant.py +++ b/src/qp/quantile_pdf_constructors/piecewise_constant.py @@ -38,7 +38,8 @@ def prepare_constructor(self) -> None: self._cdf_derivatives = np.zeros(self._locations.shape) self._cdf_2nd_derivatives = np.zeros(self._locations.shape) - self._cdf_derivatives[:,0:-1] = (self._quantiles[1:] - self._quantiles[0:-1])/(self._locations[:,1:] - self._locations[:,0:-1]) + self._cdf_derivatives[:,0:-1] = (self._quantiles[1:] - self._quantiles[0:-1])/\ + (self._locations[:,1:] - self._locations[:,0:-1]) self._cdf_2nd_derivatives[:,0:-1] = self._cdf_derivatives[:,1:] - self._cdf_derivatives[:,0:-1] # Offset the locations by -(l_[i+1] - l_i) / 2. So that the cdf_deriv can be correctly located. @@ -95,4 +96,5 @@ def debug(self): _adjusted_locations : Result of shifting the locations due to the use of non-central numerical derivatives """ - return self._quantiles, self._locations, self._cdf_derivatives, self._cdf_2nd_derivatives, self._adjusted_locations + return (self._quantiles, self._locations, self._cdf_derivatives, + self._cdf_2nd_derivatives, self._adjusted_locations) diff --git a/src/qp/quantile_pdf_constructors/piecewise_linear.py b/src/qp/quantile_pdf_constructors/piecewise_linear.py index 52b1feb2..45421a62 100644 --- a/src/qp/quantile_pdf_constructors/piecewise_linear.py +++ b/src/qp/quantile_pdf_constructors/piecewise_linear.py @@ -34,7 +34,8 @@ def prepare_constructor(self) -> None: not a central derivative. """ # calculate the first derivative using a forward difference. - self._cdf_derivatives = (self._quantiles[1:] - self._quantiles[0:-1])/(self._locations[:,1:] - self._locations[:,0:-1]) + self._cdf_derivatives = (self._quantiles[1:] - self._quantiles[0:-1])/\ + (self._locations[:,1:] - self._locations[:,0:-1]) # Offset the locations by -(l_[i+1] - l_i) / 2. So that the cdf_deriv can be correctly located. # This offset is necessary to correctly place the _cdf_derivs because we are using a diff --git a/src/qp/scipy_pdfs.py b/src/qp/scipy_pdfs.py index 7175fe9b..096e1434 100644 --- a/src/qp/scipy_pdfs.py +++ b/src/qp/scipy_pdfs.py @@ -23,11 +23,21 @@ from qp.factory import stats # pylint: disable=no-member -stats.norm_gen.test_data = dict(norm=dict(gen_func=stats.norm, ctor_data=dict(loc=LOC, scale=SCALE),\ - test_xvals=TEST_XVALS, do_samples=True, ancil=dict(zmode=LOC)), - norm_shifted=dict(gen_func=stats.norm, ctor_data=dict(loc=LOC, scale=SCALE),\ - test_xvals=TEST_XVALS), - norm_multi_d=dict(gen_func=stats.norm,\ - ctor_data=dict(loc=np.array([LOC, LOC]),\ - scale=np.array([SCALE, SCALE])),\ - test_xvals=TEST_XVALS, do_samples=True)) +stats.norm_gen.test_data = dict( + norm=dict( + gen_func=stats.norm, + ctor_data=dict(loc=LOC, scale=SCALE), + test_xvals=TEST_XVALS, + do_samples=True, + ancil=dict(zmode=LOC), + ), + norm_shifted=dict( + gen_func=stats.norm, ctor_data=dict(loc=LOC, scale=SCALE), test_xvals=TEST_XVALS + ), + norm_multi_d=dict( + gen_func=stats.norm, + ctor_data=dict(loc=np.array([LOC, LOC]), scale=np.array([SCALE, SCALE])), + test_xvals=TEST_XVALS, + do_samples=True, + ), +) diff --git a/src/qp/sparse_pdf.py b/src/qp/sparse_pdf.py index c4700797..deeafb48 100644 --- a/src/qp/sparse_pdf.py +++ b/src/qp/sparse_pdf.py @@ -12,6 +12,7 @@ from qp.conversion_funcs import extract_sparse_from_xy from qp.test_data import TEST_XVALS, NPDF + class sparse_gen(interp_gen): """Sparse based distribution. The final behavior is similar to interp_gen, but the constructor takes a sparse representation to build the interpolator. @@ -22,62 +23,62 @@ class sparse_gen(interp_gen): This implements a qp interface to the original code SparsePz from M. Carrasco-Kind. """ - # pylint: disable=protected-access + # pylint: disable=protected-access - name = 'sparse' + name = "sparse" version = 0 _support_mask = rv_continuous._support_mask - def __init__(self, xvals, mu, sig, dims, sparse_indices, *args, **kwargs): + def __init__(self, xvals, mu, sig, dims, sparse_indices, *args, **kwargs): # pylint: disable=too-many-arguments self.sparse_indices = sparse_indices self._xvals = xvals self.mu = mu self.sig = sig self.dims = dims - cut = kwargs.pop('cut', 1.e-5) - #recreate the basis array from the metadata + cut = kwargs.pop("cut", 1.0e-5) + # recreate the basis array from the metadata sparse_meta = dict(xvals=xvals, mu=mu, sig=sig, dims=dims) A = sparse_rep.create_basis(sparse_meta, cut=cut) - #decode the sparse indices into basis indices and weights + # decode the sparse indices into basis indices and weights basis_indices, weights = sparse_rep.decode_sparse_indices(sparse_indices) - #retrieve the weighted array of basis functions for each object + # retrieve the weighted array of basis functions for each object pdf_y = A[:, basis_indices] * weights - #normalize and sum the weighted pdfs - x = sparse_meta['xvals'] + # normalize and sum the weighted pdfs + x = sparse_meta["xvals"] y = pdf_y.sum(axis=-1) norms = sciint.trapz(y.T, x) y /= norms - kwargs.setdefault('xvals', x) - kwargs.setdefault('yvals', y.T) + kwargs.setdefault("xvals", x) + kwargs.setdefault("yvals", y.T) super().__init__(*args, **kwargs) self._clearobjdata() - self._addmetadata('xvals', self._xvals) - self._addmetadata('mu', self.mu) - self._addmetadata('sig', self.sig) - self._addmetadata('dims', self.dims) - self._addobjdata('sparse_indices', self.sparse_indices) + self._addmetadata("xvals", self._xvals) + self._addmetadata("mu", self.mu) + self._addmetadata("sig", self.sig) + self._addmetadata("dims", self.dims) + self._addobjdata("sparse_indices", self.sparse_indices) def _updated_ctor_param(self): """ Add the two constructor's arguments for the Factory """ dct = super()._updated_ctor_param() - dct['sparse_indices'] = self.sparse_indices - dct['xvals'] = self._xvals - dct['mu'] = self.mu - dct['sig'] = self.sig - dct['dims'] = self.dims + dct["sparse_indices"] = self.sparse_indices + dct["xvals"] = self._xvals + dct["mu"] = self.mu + dct["sig"] = self.sig + dct["dims"] = self.dims return dct @classmethod def get_allocation_kwds(cls, npdf, **kwargs): - if 'dims' not in kwargs: - raise ValueError("required argument dims not in kwargs") #pragma: no cover - nsp = np.array(kwargs['dims']).flatten()[4] - return dict(sparse_indices=((npdf, nsp), 'i8')) + if "dims" not in kwargs: + raise ValueError("required argument dims not in kwargs") # pragma: no cover + nsp = np.array(kwargs["dims"]).flatten()[4] + return dict(sparse_indices=((npdf, nsp), "i8")) @classmethod def add_mappings(cls): @@ -87,13 +88,12 @@ def add_mappings(cls): cls._add_creation_method(cls.create, None) cls._add_extraction_method(extract_sparse_from_xy, None) - @staticmethod def build_test_data(): """build a test case out of real pdfs""" - qproot = sys.modules['qp'].__path__[0] - filein = os.path.join(qproot, './data/CFHTLens_sample.P.npy') - #FORMAT FILE, EACH ROW IS THE PDF FOR EACH GALAXY, LAST ROW IS THE REDSHIFT POSITION + qproot = sys.modules["qp"].__path__[0] + filein = os.path.join(qproot, "./data/CFHTLens_sample.P.npy") + # FORMAT FILE, EACH ROW IS THE PDF FOR EACH GALAXY, LAST ROW IS THE REDSHIFT POSITION P = np.load(filein) z = P[-1] P = P[:NPDF] @@ -101,23 +101,33 @@ def build_test_data(): minz = np.min(z) nz = 301 _, j = np.where(P > 0) - maxz = np.max(z[j+1]) + maxz = np.max(z[j + 1]) newz = np.linspace(minz, maxz, nz) interp = sciinterp.interp1d(z, P, assume_sorted=True) newpdf = interp(newz) newpdf = newpdf / sciint.trapz(newpdf, newz).reshape(-1, 1) - sparse_idx, meta, _ = sparse_rep.build_sparse_representation(newz, newpdf, verbose=False) + sparse_idx, meta, _ = sparse_rep.build_sparse_representation( + newz, newpdf, verbose=False + ) return sparse_idx, meta - @classmethod def make_test_data(cls): SPARSE_IDX, META = cls.build_test_data() - cls.test_data = dict(sparse=dict(gen_func=sparse, \ - ctor_data=dict(xvals=META['xvals'], mu=META['mu'], sig=META['sig'],\ - dims=META['dims'], sparse_indices=SPARSE_IDX),\ - test_xvals=TEST_XVALS), ) + cls.test_data = dict( + sparse=dict( + gen_func=sparse, + ctor_data=dict( + xvals=META["xvals"], + mu=META["mu"], + sig=META["sig"], + dims=META["dims"], + sparse_indices=SPARSE_IDX, + ), + test_xvals=TEST_XVALS, + ), + ) sparse = sparse_gen.create diff --git a/src/qp/sparse_rep.py b/src/qp/sparse_rep.py index fc7c69eb..398fb869 100644 --- a/src/qp/sparse_rep.py +++ b/src/qp/sparse_rep.py @@ -6,38 +6,41 @@ This module reorganizes it for usage by DESC within qp, and is python3 compliant. """ -__author__ = 'Matias Carrasco Kind' +__author__ = "Matias Carrasco Kind" import numpy as np -from scipy.special import voigt_profile +from scipy.special import voigt_profile # pylint: disable=no-name-in-module from scipy import linalg as sla from scipy import integrate as sciint -def shapes2pdf(wa, ma, sa, ga, meta, cut=1.e-5): + +def shapes2pdf(wa, ma, sa, ga, meta, cut=1.0e-5): # pylint: disable=too-many-arguments """return a pdf evaluated at the meta['xvals'] values for the given set of Voigt parameters""" - #input : list of shape parameters for a single object - x = meta['xvals'] + # input : list of shape parameters for a single object + x = meta["xvals"] pdf = np.zeros_like(x) for w, m, s, g in zip(wa, ma, sa, ga): pdft = voigt_profile(x - m, s, g) - pdft = np.where(pdft >= cut, pdft, 0.) + pdft = np.where(pdft >= cut, pdft, 0.0) pdft = w * pdft / sla.norm(pdft) pdf += pdft - pdf = np.where(pdf >= cut, pdf, 0.) + pdf = np.where(pdf >= cut, pdf, 0.0) return pdf / sciint.trapz(pdf, x) -def create_basis(metadata, cut=1.e-5): + +def create_basis(metadata, cut=1.0e-5): """create the Voigt basis matrix out of a metadata dictionary""" - mu = metadata['mu'] - Nmu = metadata['dims'][0] - sigma = metadata['sig'] - Nsigma = metadata['dims'][1] - Nv = metadata['dims'][2] - xvals = metadata['xvals'] + mu = metadata["mu"] + Nmu = metadata["dims"][0] + sigma = metadata["sig"] + Nsigma = metadata["dims"][1] + Nv = metadata["dims"][2] + xvals = metadata["xvals"] return create_voigt_basis(xvals, mu, Nmu, sigma, Nsigma, Nv, cut=cut) -def create_voigt_basis(xvals, mu, Nmu, sigma, Nsigma, Nv, cut=1.e-5): + +def create_voigt_basis(xvals, mu, Nmu, sigma, Nsigma, Nv, cut=1.0e-5): # pylint: disable=too-many-arguments """ Creates a gaussian-voigt dictionary at the same resolution as the original PDF @@ -65,23 +68,25 @@ def create_voigt_basis(xvals, mu, Nmu, sigma, Nsigma, Nv, cut=1.e-5): for j in range(Nsigma): for k in range(Nv): pdft = voigt_profile(xvals - means[i], sig[j], gamma[k]) - pdft = np.where(pdft >= cut, pdft, 0.) + pdft = np.where(pdft >= cut, pdft, 0.0) A[:, kk] = pdft / sla.norm(pdft) kk += 1 return A + def sparse_basis(dictionary, query_vec, n_basis, tolerance=None): """ Compute sparse representation of a vector given Dictionary (basis) for a given tolerance or number of basis. It uses Cholesky decomposition to speed the process and to - solve the linear operations adapted from Rubinstein, R., Zibulevsky, M. and Elad, M., Technical Report - CS + solve the linear operations adapted from + Rubinstein, R., Zibulevsky, M. and Elad, M., Technical Report - CS Technion, April 2008 - :param float dictionary: Array with all basis on each column, + :param float dictionary: Array with all basis on each column, must has shape (len(vector), total basis) and each column must have euclidean l-2 norm equal to 1 :param float query_vec: vector of which a sparse representation is desired :param int n_basis: number of desired basis - :param float tolerance: tolerance desired if n_basis is not needed to be fixed, must input a large number + :param float tolerance: tolerance desired if n_basis is not needed to be fixed, must input a large number for n_basis to assure achieving tolerance :return: indices, values (2 arrays one with the position and the second with the coefficients) @@ -93,16 +98,23 @@ def sparse_basis(dictionary, query_vec, n_basis, tolerance=None): res = query_vec idxs = np.arange(dictionary.shape[1]) # keeping track of swapping L = np.zeros((n_basis, n_basis), dtype=dictionary.dtype) - L[0, 0] = 1. + L[0, 0] = 1.0 for n_active in range(n_basis): lam = np.argmax(abs(np.dot(dictionary.T, res))) - if lam < n_active or alpha[lam] ** 2 < machine_eps: #pragma: no cover + if lam < n_active or alpha[lam] ** 2 < machine_eps: # pragma: no cover n_active -= 1 break - if n_active > 0: #pragma: no cover + if n_active > 0: # pragma: no cover # Updates the Cholesky decomposition of dictionary - L[n_active, :n_active] = np.dot(dictionary[:, :n_active].T, dictionary[:, lam]) - sla.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active], lower=True, overwrite_b=True) + L[n_active, :n_active] = np.dot( + dictionary[:, :n_active].T, dictionary[:, lam] + ) + sla.solve_triangular( + L[:n_active, :n_active], + L[n_active, :n_active], + lower=True, + overwrite_b=True, + ) v = sla.norm(L[n_active, :n_active]) ** 2 if 1 - v <= machine_eps: print("Selected basis are dependent or normed are not unity") @@ -112,21 +124,27 @@ def sparse_basis(dictionary, query_vec, n_basis, tolerance=None): alpha[[n_active, lam]] = alpha[[lam, n_active]] idxs[[n_active, lam]] = idxs[[lam, n_active]] # solves LL'x = query_vec as a composition of two triangular systems - gamma = sla.cho_solve((L[:n_active + 1, :n_active + 1], True), alpha[:n_active + 1], overwrite_b=False) - res = query_vec - np.dot(dictionary[:, :n_active + 1], gamma) + gamma = sla.cho_solve( + (L[: n_active + 1, : n_active + 1], True), + alpha[: n_active + 1], + overwrite_b=False, + ) + res = query_vec - np.dot(dictionary[:, : n_active + 1], gamma) if tolerance is not None and sla.norm(res) ** 2 <= tolerance: break - a_n[idxs[:n_active + 1]] = gamma + a_n[idxs[: n_active + 1]] = gamma del dictionary - #return a_n - return idxs[:n_active + 1], gamma + # return a_n + return idxs[: n_active + 1], gamma + def combine_int(Ncoef, Nbase): """ combine index of base (up to 62500 bases) and value (16 bits integer with sign) in a 32 bit integer First half of word is for the value and second half for the index - :param int Ncoef: Integer with sign to represent the value associated with a base, this is a sign 16 bits integer + :param int Ncoef: Integer with sign to represent the value associated with a base, + this is a sign 16 bits integer :param int Nbase: Integer representing the base, unsigned 16 bits integer :return: 32 bits integer """ @@ -143,19 +161,20 @@ def get_N(longN): :return: Ncoef, Nbase both 16 bits integer """ - return (longN >> 16), (longN & (2 ** 16 - 1)) + return (longN >> 16), (longN & (2**16 - 1)) + def decode_sparse_indices(indices): - """decode sparse indices into basis indices and weigth array - """ + """decode sparse indices into basis indices and weigth array""" Ncoef = 32001 sp_ind = np.array(list(map(get_N, indices))) spi = sp_ind[:, 0, :] - dVals = 1./(Ncoef - 1) + dVals = 1.0 / (Ncoef - 1) vals = spi * dVals - vals[:, 0] = 1. + vals[:, 0] = 1.0 return sp_ind[:, 1, :], vals + def indices2shapes(sparse_indices, meta): """compute the Voigt shape parameters from the sparse index @@ -167,25 +186,25 @@ def indices2shapes(sparse_indices, meta): meta: `dict` Dictionary of metadata to decode the sparse indices """ - Nmu = meta['dims'][0] - Nsigma = meta['dims'][1] - Nv = meta['dims'][2] - Ncoef = meta['dims'][3] - mu = meta['mu'] - sigma = meta['sig'] + Nmu = meta["dims"][0] + Nsigma = meta["dims"][1] + Nv = meta["dims"][2] + Ncoef = meta["dims"][3] + mu = meta["mu"] + sigma = meta["sig"] means_array = np.linspace(mu[0], mu[1], Nmu) sig_array = np.linspace(sigma[0], sigma[1], Nsigma) gam_array = np.linspace(0, 0.5, Nv) - #split the sparse indices into pairs (weight, basis_index) - #for each sparse index corresponding to one of the basis function + # split the sparse indices into pairs (weight, basis_index) + # for each sparse index corresponding to one of the basis function sp_ind = np.array(list(map(get_N, sparse_indices))) spi = sp_ind[:, 0, :] - dVals = 1./(Ncoef - 1) + dVals = 1.0 / (Ncoef - 1) vals = spi * dVals - vals[:, 0] = 1. + vals[:, 0] = 1.0 Dind2 = sp_ind[:, 1, :] means = means_array[np.array(Dind2 / (Nsigma * Nv), int)] @@ -195,10 +214,20 @@ def indices2shapes(sparse_indices, meta): return vals, means, sigmas, gammas -def build_sparse_representation(x, P, mu=None, Nmu=None, sig=None, Nsig=None, Nv=3, Nsparse=20, tol=1.e-10, verbose=True): - """compute the sparse representation of a set of pdfs evaluated on a common x array - """ - #Note : the range for gamma is fixed to [0, 0.5] in create_voigt_basis +def build_sparse_representation( # pylint: disable=too-many-arguments + x, + P, + mu=None, + Nmu=None, + sig=None, + Nsig=None, + Nv=3, + Nsparse=20, + tol=1.0e-10, + verbose=True, +): + """compute the sparse representation of a set of pdfs evaluated on a common x array""" + # Note : the range for gamma is fixed to [0, 0.5] in create_voigt_basis Ntot = len(P) if verbose: print("Total Galaxies = ", Ntot) @@ -208,19 +237,19 @@ def build_sparse_representation(x, P, mu=None, Nmu=None, sig=None, Nsig=None, Nv if Nmu is None: Nmu = len(x) if sig is None: - max_sig = (max(x) - min(x)) / 12. - min_sig = dx / 6. + max_sig = (max(x) - min(x)) / 12.0 + min_sig = dx / 6.0 sig = [min_sig, max_sig] if Nsig is None: - Nsig = int(np.ceil(2. * (max_sig - min_sig) / dx)) + Nsig = int(np.ceil(2.0 * (max_sig - min_sig) / dx)) if verbose: - print('dx = ', dx) - print('Nmu, Nsig, Nv = ', '[', Nmu, ',', Nsig, ',', Nv, ']') - print('Total bases in dictionary', Nmu * Nsig * Nv) - print('Nsparse (number of bases) = ', Nsparse) - #Create dictionary - print('Creating Dictionary...') + print("dx = ", dx) + print("Nmu, Nsig, Nv = ", "[", Nmu, ",", Nsig, ",", Nv, "]") + print("Total bases in dictionary", Nmu * Nsig * Nv) + print("Nsparse (number of bases) = ", Nsparse) + # Create dictionary + print("Creating Dictionary...") A = create_voigt_basis(x, mu, Nmu, sig, Nsig, Nv) bigD = {} @@ -229,48 +258,48 @@ def build_sparse_representation(x, P, mu=None, Nmu=None, sig=None, Nsig=None, Nv AA = np.linspace(0, 1, Ncoef) Da = AA[1] - AA[0] - bigD['xvals'] = x - bigD['mu'] = mu - bigD['sig'] = sig - bigD['dims'] = [Nmu, Nsig, Nv, Ncoef, Nsparse] - bigD['Ntot'] = Ntot + bigD["xvals"] = x + bigD["mu"] = mu + bigD["sig"] = sig + bigD["dims"] = [Nmu, Nsig, Nv, Ncoef, Nsparse] + bigD["Ntot"] = Ntot if verbose: - print('Creating Sparse representation...') + print("Creating Sparse representation...") - Sparse_Array = np.zeros((Ntot, Nsparse), dtype='int') + Sparse_Array = np.zeros((Ntot, Nsparse), dtype="int") for k in range(Ntot): pdf0 = P[k] Dind, Dval = sparse_basis(A, pdf0, Nsparse, tolerance=tol) - if len(Dind) < 1:#pragma: no cover + if len(Dind) < 1: # pragma: no cover continue - #bigD[k]['sparse'] = [Dind, Dval] + # bigD[k]['sparse'] = [Dind, Dval] if max(Dval) > 0: dval0 = Dval[0] Dvalm = Dval / np.max(Dval) - index = np.array(list(map(round, (Dvalm / Da))), dtype='int') - index0 = int(round(dval0/Da)) + index = np.array(list(map(round, (Dvalm / Da))), dtype="int") + index0 = int(round(dval0 / Da)) index[0] = index0 else: - index = np.zeros(len(Dind), dtype='int') #pragma: no cover + index = np.zeros(len(Dind), dtype="int") # pragma: no cover sparse_ind = np.array(list(map(combine_int, index, Dind))) - Sparse_Array[k, 0:len(sparse_ind)] = sparse_ind + Sparse_Array[k, 0 : len(sparse_ind)] = sparse_ind - #swap back columns + # swap back columns A[:, [Dind]] = A[:, [np.arange(len(Dind))]] if verbose: - print('done') + print("done") return Sparse_Array, bigD, A -def pdf_from_sparse(sparse_indices, A, xvals, cut=1.e-5): - """return the array of evaluations at xvals from the sparse indices - """ + +def pdf_from_sparse(sparse_indices, A, xvals, cut=1.0e-5): + """return the array of evaluations at xvals from the sparse indices""" indices, vals = decode_sparse_indices(sparse_indices) - pdf_y = (A[:, indices]*vals).sum(axis=-1) - pdf_y = np.where(pdf_y >= cut, pdf_y, 0.) + pdf_y = (A[:, indices] * vals).sum(axis=-1) + pdf_y = np.where(pdf_y >= cut, pdf_y, 0.0) pdf_x = xvals norms = sciint.trapz(pdf_y.T, pdf_x) pdf_y /= norms diff --git a/src/qp/spline_pdf.py b/src/qp/spline_pdf.py index 3a9827e1..b31830d2 100644 --- a/src/qp/spline_pdf.py +++ b/src/qp/spline_pdf.py @@ -4,7 +4,7 @@ import numpy as np from scipy.integrate import quad from scipy.interpolate import splev, splint, splrep -from scipy.special import errstate +from scipy.special import errstate # pylint: disable=no-name-in-module from scipy.stats import rv_continuous from qp.conversion_funcs import extract_samples, extract_xy_vals @@ -41,10 +41,11 @@ def normalize_spline(xvals, yvals, limits, **kwargs): def row_integral(irow): def spl(xv): return splev(xv, splrep(xvals[irow], yvals[irow])) + return quad(spl, limits[0], limits[1], **kwargs)[0] vv = np.vectorize(row_integral) - with errstate(all='ignore'): + with errstate(all="ignore"): integrals = vv(np.arange(xvals.shape[0])) return (yvals.T / integrals).T @@ -104,9 +105,10 @@ class spline_gen(Pdf_rows_gen): The ppf() will use the default scipy implementation, which inverts the cdf() as evaluated on an adaptive grid. """ + # pylint: disable=protected-access - name = 'spline' + name = "spline" version = 0 _support_mask = rv_continuous._support_mask @@ -128,27 +130,28 @@ def __init__(self, *args, **kwargs): ----- Either (splx, sply and spln) must be provided. """ - splx = kwargs.pop('splx', None) - sply = kwargs.pop('sply', None) - spln = kwargs.pop('spln', None) + splx = kwargs.pop("splx", None) + sply = kwargs.pop("sply", None) + spln = kwargs.pop("spln", None) if splx is None: # pragma: no cover raise ValueError("splx must be provided") if splx.shape != sply.shape: # pragma: no cover - raise ValueError("Shape of xvals (%s) != shape of yvals (%s)" % (splx.shape, sply.shape)) - #kwargs['a'] = self.a = np.min(splx) - #kwargs['b'] = self.b = np.max(splx) + raise ValueError( + "Shape of xvals (%s) != shape of yvals (%s)" % (splx.shape, sply.shape) + ) + # kwargs['a'] = self.a = np.min(splx) + # kwargs['b'] = self.b = np.max(splx) self._xmin = np.min(splx) self._xmax = np.max(splx) - kwargs['shape'] = splx.shape[:-1] + kwargs["shape"] = splx.shape[:-1] self._splx = reshape_to_pdf_size(splx, -1) self._sply = reshape_to_pdf_size(sply, -1) self._spln = reshape_to_pdf_size(spln, -1) super().__init__(*args, **kwargs) - self._addobjdata('splx', self._splx) - self._addobjdata('sply', self._sply) - self._addobjdata('spln', self._spln) - + self._addobjdata("splx", self._splx) + self._addobjdata("sply", self._sply) + self._addobjdata("spln", self._spln) @staticmethod def build_normed_splines(xvals, yvals, **kwargs): @@ -172,13 +175,15 @@ def build_normed_splines(xvals, yvals, **kwargs): The order of the spline knots """ if xvals.shape != yvals.shape: # pragma: no cover - raise ValueError("Shape of xvals (%s) != shape of yvals (%s)" % (xvals.shape, yvals.shape)) + raise ValueError( + "Shape of xvals (%s) != shape of yvals (%s)" + % (xvals.shape, yvals.shape) + ) xmin = np.min(xvals) xmax = np.max(xvals) yvals = normalize_spline(xvals, yvals, limits=(xmin, xmax), **kwargs) return build_splines(xvals, yvals) - @classmethod def create_from_xy_vals(cls, xvals, yvals, **kwargs): """ @@ -218,12 +223,11 @@ def create_from_samples(cls, xvals, samples, **kwargs): The requested PDF """ kdes = build_kdes(samples) - kwargs.pop('yvals', None) + kwargs.pop("yvals", None) yvals = evaluate_kdes(xvals, kdes) - xvals_expand = (np.expand_dims(xvals, -1)*np.ones(samples.shape[0])).T + xvals_expand = (np.expand_dims(xvals, -1) * np.ones(samples.shape[0])).T return cls.create_from_xy_vals(xvals_expand, yvals, **kwargs) - @property def splx(self): """Return x-values of the spline knots""" @@ -242,19 +246,24 @@ def spln(self): def _pdf(self, x, row): # pylint: disable=arguments-differ def pdf_row(xv, irow): - return splev(xv, (self._splx[irow], self._sply[irow], self._spln[irow].item())) + return splev( + xv, (self._splx[irow], self._sply[irow], self._spln[irow].item()) + ) - with errstate(all='ignore'): + with errstate(all="ignore"): vv = np.vectorize(pdf_row) return vv(x, row).ravel() - def _cdf(self, x, row): # pylint: disable=arguments-differ def cdf_row(xv, irow): - return splint(self._xmin, xv, (self._splx[irow], self._sply[irow], self._spln[irow].item())) + return splint( + self._xmin, + xv, + (self._splx[irow], self._sply[irow], self._spln[irow].item()), + ) - with errstate(all='ignore'): + with errstate(all="ignore"): vv = np.vectorize(cdf_row) return vv(x, row).ravel() @@ -263,9 +272,9 @@ def _updated_ctor_param(self): Set the bins as additional constructor argument """ dct = super()._updated_ctor_param() - dct['splx'] = self._splx - dct['sply'] = self._sply - dct['spln'] = self._spln + dct["splx"] = self._splx + dct["sply"] = self._sply + dct["spln"] = self._spln return dct @classmethod @@ -274,7 +283,7 @@ def get_allocation_kwds(cls, npdf, **kwargs): Return the keywords necessary to create an 'empty' hdf5 file with npdf entries for iterative file writeout. We only need to allocate the objdata columns, as the metadata can be written when we finalize the file. - + Parameters ---------- npdf: int @@ -282,12 +291,11 @@ def get_allocation_kwds(cls, npdf, **kwargs): kwargs: dict dictionary of kwargs needed to create the ensemble """ - if 'splx' not in kwargs: #pragma: no cover + if "splx" not in kwargs: # pragma: no cover raise ValueError("required argument splx not included in kwargs") - shape = np.shape(kwargs['splx']) - return dict(splx=(shape, 'f4'), sply=(shape, 'f4'), spln=((shape[0],1), 'i4')) - + shape = np.shape(kwargs["splx"]) + return dict(splx=(shape, "f4"), sply=(shape, "f4"), spln=((shape[0], 1), "i4")) @classmethod def plot_native(cls, pdf, **kwargs): @@ -296,7 +304,7 @@ def plot_native(cls, pdf, **kwargs): For a spline this shows the spline knots """ axes, _, kw = get_axes_and_xlims(**kwargs) - xvals = pdf.dist.splx[pdf.kwds['row']] + xvals = pdf.dist.splx[pdf.kwds["row"]] return plot_pdf_on_axes(axes, pdf, xvals, **kw) @classmethod @@ -310,22 +318,32 @@ def add_mappings(cls): cls._add_extraction_method(extract_xy_vals, "xy") cls._add_extraction_method(extract_samples, "samples") - @classmethod def make_test_data(cls): - """ Make data for unit tests """ + """Make data for unit tests""" SPLX, SPLY, SPLN = cls.build_normed_splines(XARRAY, YARRAY) - cls.test_data = dict(spline=dict(gen_func=spline, ctor_data=dict(splx=SPLX, sply=SPLY, spln=SPLN),\ - test_xvals=TEST_XVALS[::10]), - spline_kde=dict(gen_func=spline_from_samples,\ - ctor_data=dict(samples=SAMPLES, xvals=np.linspace(0, 5, 51)),\ - convert_data=dict(xvals=np.linspace(0, 5, 51), method='samples'),\ - test_xvals=TEST_XVALS, atol_diff2=1., test_pdf=False),\ - spline_xy=dict(gen_func=spline_from_xy,\ - ctor_data=dict(xvals=XARRAY, yvals=YARRAY),\ - convert_data=dict(xvals=np.linspace(0, 5, 51), method='xy'),\ - test_xvals=TEST_XVALS, test_pdf=False)) - + cls.test_data = dict( + spline=dict( + gen_func=spline, + ctor_data=dict(splx=SPLX, sply=SPLY, spln=SPLN), + test_xvals=TEST_XVALS[::10], + ), + spline_kde=dict( + gen_func=spline_from_samples, + ctor_data=dict(samples=SAMPLES, xvals=np.linspace(0, 5, 51)), + convert_data=dict(xvals=np.linspace(0, 5, 51), method="samples"), + test_xvals=TEST_XVALS, + atol_diff2=1.0, + test_pdf=False, + ), + spline_xy=dict( + gen_func=spline_from_xy, + ctor_data=dict(xvals=XARRAY, yvals=YARRAY), + convert_data=dict(xvals=np.linspace(0, 5, 51), method="xy"), + test_xvals=TEST_XVALS, + test_pdf=False, + ), + ) spline = spline_gen.create diff --git a/src/qp/test_data.py b/src/qp/test_data.py index 5a361081..c8d29413 100644 --- a/src/qp/test_data.py +++ b/src/qp/test_data.py @@ -9,23 +9,39 @@ NPDF = 11 NBIN = 61 NSAMPLES = 100 -XMIN = 0. -XMAX = 5. +XMIN = 0.0 +XMAX = 5.0 LOC = np.expand_dims(np.linspace(0.5, 2.5, NPDF), -1) SCALE = np.expand_dims(np.linspace(0.2, 1.2, NPDF), -1) LOC_SHIFTED = LOC + SCALE TEST_XVALS = np.linspace(XMIN, XMAX, 201) XBINS = np.linspace(XMIN, XMAX, NBIN) -XARRAY = np.ones((NPDF, NBIN))*XBINS -YARRAY = np.expand_dims(np.linspace(0.5, 2.5, NPDF), -1)*(1. + 0.1*np.random.uniform(size=(NPDF, NBIN))) -HIST_DATA = YARRAY[:,0:-1] +XARRAY = np.ones((NPDF, NBIN)) * XBINS +YARRAY = np.expand_dims(np.linspace(0.5, 2.5, NPDF), -1) * ( + 1.0 + 0.1 * np.random.uniform(size=(NPDF, NBIN)) +) +HIST_DATA = YARRAY[:, 0:-1] QUANTS = np.linspace(0.01, 0.99, NBIN) QLOCS = sps.norm(loc=LOC, scale=SCALE).ppf(QUANTS) SAMPLES = sps.norm(loc=LOC, scale=SCALE).rvs(size=(NPDF, NSAMPLES)) -MEAN_MIXMOD = np.vstack([np.linspace(0.5, 2.5, NPDF), np.linspace(0.5, 1.5, NPDF), np.linspace(1.5, 2.5, NPDF)]).T -STD_MIXMOD = np.vstack([np.linspace(0.2, 1.2, NPDF), np.linspace(0.2, 0.5, NPDF), np.linspace(0.2, 0.5, NPDF)]).T -WEIGHT_MIXMOD = np.vstack([0.7*np.ones((NPDF)), 0.2*np.ones((NPDF)), 0.1*np.ones((NPDF))]).T +MEAN_MIXMOD = np.vstack( + [ + np.linspace(0.5, 2.5, NPDF), + np.linspace(0.5, 1.5, NPDF), + np.linspace(1.5, 2.5, NPDF), + ] +).T +STD_MIXMOD = np.vstack( + [ + np.linspace(0.2, 1.2, NPDF), + np.linspace(0.2, 0.5, NPDF), + np.linspace(0.2, 0.5, NPDF), + ] +).T +WEIGHT_MIXMOD = np.vstack( + [0.7 * np.ones((NPDF)), 0.2 * np.ones((NPDF)), 0.1 * np.ones((NPDF))] +).T -HIST_TOL = 4./NBIN +HIST_TOL = 4.0 / NBIN QP_TOPDIR = os.path.dirname(os.path.dirname(__file__)) diff --git a/src/qp/test_funcs.py b/src/qp/test_funcs.py index 5a22427d..c2183f88 100644 --- a/src/qp/test_funcs.py +++ b/src/qp/test_funcs.py @@ -1,4 +1,6 @@ -"""This small module implements generic tests for distributions to ensure that they have been implemented consistently""" +"""This small module implements generic tests for distributions to ensure +that they have been implemented consistently +""" import os import numpy as np @@ -11,29 +13,32 @@ def assert_all_close(arr, arr2, **kwds): """A slightly more informative version of asserting allclose""" - test_name = kwds.pop('test_name', 'test') - if not np.allclose(arr, arr2, **kwds): #pragma: no cover - raise ValueError("%s %.2e %.2e %s" % (test_name, (arr-arr2).min(), (arr-arr2).max(), kwds)) + test_name = kwds.pop("test_name", "test") + if not np.allclose(arr, arr2, **kwds): # pragma: no cover + raise ValueError( + "%s %.2e %.2e %s" + % (test_name, (arr - arr2).min(), (arr - arr2).max(), kwds) + ) def assert_all_small(arr, **kwds): """A slightly more informative version of asserting allclose""" - test_name = kwds.pop('test_name', 'test') - if not np.allclose(arr, 0, **kwds): #pragma: no cover + test_name = kwds.pop("test_name", "test") + if not np.allclose(arr, 0, **kwds): # pragma: no cover raise ValueError("%s %.2e %.2e %s" % (test_name, arr.min(), arr.max(), kwds)) def build_ensemble(test_data): """Build an ensemble from test data in a class""" - gen_func = test_data['gen_func'] - ctor_data = test_data['ctor_data'] + gen_func = test_data["gen_func"] + ctor_data = test_data["ctor_data"] try: ens = Ensemble(gen_func, data=ctor_data) - ancil = test_data.get('ancil') + ancil = test_data.get("ancil") if ancil is not None: ens.set_ancil(ancil) return ens - except Exception as exep: #pragma: no cover + except Exception as exep: # pragma: no cover print("Failed to make %s %s %s" % (gen_func, ctor_data, exep)) raise ValueError from exep @@ -41,19 +46,18 @@ def build_ensemble(test_data): def pdf_func_tests(pdf, test_data, short=False, check_props=True): """Run the test for a practicular class""" - xpts = test_data['test_xvals'] + xpts = test_data["test_xvals"] if check_props: # if we used the c'tor, make sure the class keeps the data used in the c'tor - for kv in test_data['ctor_data'].keys(): + for kv in test_data["ctor_data"].keys(): test_val = pdf.kwds.get(kv, None) if test_val is None: - if not hasattr(pdf.dist, kv): #pragma: no cover + if not hasattr(pdf.dist, kv): # pragma: no cover raise ValueError("%s %s" % (pdf.dist, kv)) _ = getattr(pdf.dist, kv) - - if hasattr(pdf.dist, 'npdf'): + if hasattr(pdf.dist, "npdf"): assert pdf.dist.npdf == pdf.npdf assert pdf.shape[-1] == NPDF @@ -62,17 +66,16 @@ def pdf_func_tests(pdf, test_data, short=False, check_props=True): if pdf.ndim == 1: xslice = xpts[5] pdfs_slice = pdf.pdf(xslice) - check_pdf = pdfs[:,5].flatten() - pdfs_slice.flatten() + check_pdf = pdfs[:, 5].flatten() - pdfs_slice.flatten() assert_all_small(check_pdf, atol=2e-2, test_name="pdf") xslice = np.expand_dims(xpts[np.arange(pdf.npdf)], -1) pdfs_slice = pdf.pdf(xslice) - pdf_check = np.array([pdfs[i,i] for i in range(pdf.npdf)]) - check_pdfs_slice = pdf_check.flatten() - pdfs_slice.flatten() + pdf_check = np.array([pdfs[i, i] for i in range(pdf.npdf)]) + check_pdfs_slice = pdf_check.flatten() - pdfs_slice.flatten() assert_all_small(check_pdfs_slice, atol=2e-2, test_name="pdf_slice") - - if short: #pragma: no cover + if short: # pragma: no cover return pdf cdfs = pdf.cdf(xpts) @@ -80,9 +83,13 @@ def pdf_func_tests(pdf, test_data, short=False, check_props=True): binw = xpts[1:] - xpts[0:-1] if pdf.ndim == 1: - check_cdf = ((pdfs[:,0:-1] + pdfs[:,1:]) * binw /2).cumsum(axis=-1) - cdfs[:,1:] + check_cdf = ((pdfs[:, 0:-1] + pdfs[:, 1:]) * binw / 2).cumsum(axis=-1) - cdfs[ + :, 1: + ] else: - check_cdf = ((pdfs[:,:,0:-1] + pdfs[:,:,1:]) * binw /2).cumsum(axis=-1) - cdfs[:,:,1:] + check_cdf = ((pdfs[:, :, 0:-1] + pdfs[:, :, 1:]) * binw / 2).cumsum( + axis=-1 + ) - cdfs[:, :, 1:] assert_all_small(check_cdf, atol=2e-1, test_name="cdf") @@ -93,7 +100,7 @@ def pdf_func_tests(pdf, test_data, short=False, check_props=True): if pdf.ndim == 1: quants_slice = np.expand_dims(quants[np.arange(pdf.npdf)], -1) ppfs_slice = pdf.ppf(quants_slice) - _ = np.array([ppfs[i,i] for i in range(pdf.npdf)]) + _ = np.array([ppfs[i, i] for i in range(pdf.npdf)]) check_ppfs_slice = pdf.cdf(ppfs_slice) - quants_slice assert_all_small(check_ppfs_slice, atol=2e-2, test_name="ppf_slice") @@ -111,33 +118,35 @@ def pdf_func_tests(pdf, test_data, short=False, check_props=True): def run_pdf_func_tests(test_class, test_data, short=False, check_props=True): """Run the test for a practicular class""" - method = test_data.get('method', None) + method = test_data.get("method", None) ctor_func = test_class.creation_method(method) - if ctor_func is None: #pragma: no cover - raise KeyError("failed to find creation method %s for class %s" % (method, test_class)) + if ctor_func is None: # pragma: no cover + raise KeyError( + "failed to find creation method %s for class %s" % (method, test_class) + ) try: - pdf = ctor_func(**test_data['ctor_data']) - except Exception as exep: #pragma: no cover - print("Failed to make %s %s %s" % (ctor_func, test_data['ctor_data'], exep)) + pdf = ctor_func(**test_data["ctor_data"]) + except Exception as exep: # pragma: no cover + print("Failed to make %s %s %s" % (ctor_func, test_data["ctor_data"], exep)) raise ValueError from exep - alloc_kwds = pdf.dist.get_allocation_kwds(pdf.npdf, **test_data['ctor_data']) + alloc_kwds = pdf.dist.get_allocation_kwds(pdf.npdf, **test_data["ctor_data"]) for key, val in alloc_kwds.items(): - assert np.product(val[0]) == np.size(test_data['ctor_data'][key]) + assert np.product(val[0]) == np.size(test_data["ctor_data"][key]) return pdf_func_tests(pdf, test_data, short=short, check_props=check_props) def persist_func_test(ensemble, test_data): """Run loopback persistence tests on an ensemble""" - #ftypes = ['fits', 'hdf5', 'pq'] + # ftypes = ['fits', 'hdf5', 'pq'] if ensemble.ndim == 1: - ftypes = ['fits', 'hf5', 'h5', 'pq', 'hdf5'] + ftypes = ["fits", "hf5", "h5", "pq", "hdf5"] else: - ftypes = ['fits', 'hf5'] + ftypes = ["fits", "hf5"] for ftype in ftypes: - filename = "test_%s.%s" % ( ensemble.gen_class.name, ftype ) + filename = "test_%s.%s" % (ensemble.gen_class.name, ftype) try: os.remove(filename) except FileNotFoundError: @@ -147,17 +156,19 @@ def persist_func_test(ensemble, test_data): ens_r = read(filename) meta2 = ens_r.metadata() # check that reading metadata and main file get same metadata items - for k, v in meta.items(): + for k, _v in meta.items(): # we can't actually do a better check than this because the build_tables # method in the ensemble class may add extra metadata items and change their type assert k in meta2 - diff = ensemble.pdf(test_data['test_xvals']) - ens_r.pdf(test_data['test_xvals']) + diff = ensemble.pdf(test_data["test_xvals"]) - ens_r.pdf( + test_data["test_xvals"] + ) assert_all_small(diff, atol=1e-5, test_name="persist") if ensemble.ancil is not None: - if ftype in ['pq']: + if ftype in ["pq"]: continue - diff2 = ensemble.ancil['zmode'] - ens_r.ancil['zmode'] + diff2 = ensemble.ancil["zmode"] - ens_r.ancil["zmode"] assert_all_small(diff2, atol=1e-5, test_name="persist_ancil") try: os.unlink(filename) @@ -179,24 +190,24 @@ def run_persist_func_tests(test_data): def run_convert_tests(ens_orig, gen_class, test_data, **kwargs): """Run the test for a practicular class""" - xpts = test_data['test_xvals'] + xpts = test_data["test_xvals"] binw = np.mean(xpts[1:] - xpts[0:-1]) - atol = kwargs.get('atol_diff', 1e-2)/binw - atol2 = kwargs.get('atol_diff2', 1e-2)/binw + atol = kwargs.get("atol_diff", 1e-2) / binw + atol2 = kwargs.get("atol_diff2", 1e-2) / binw - ens1 = ens_orig.convert_to(gen_class, **test_data['convert_data']) + ens1 = ens_orig.convert_to(gen_class, **test_data["convert_data"]) diffs = ens_orig.pdf(xpts) - ens1.pdf(xpts) assert_all_small(diffs, atol=atol, test_name="convert") - ens2 = convert(ens_orig, gen_class.name, **test_data['convert_data']) + ens2 = convert(ens_orig, gen_class.name, **test_data["convert_data"]) diffs = ens_orig.pdf(xpts) - ens2.pdf(xpts) assert_all_small(diffs, atol=atol2, test_name="convert2") assert ens_orig.shape[:-1] == ens1.shape[:-1] assert ens_orig.shape[:-1] == ens2.shape[:-1] assert ens_orig.frozen.shape[:-1] == ens2.shape[:-1] - if hasattr(ens2.dist, 'shape'): + if hasattr(ens2.dist, "shape"): assert ens2.dist.shape[:-1] == ens2.shape[:-1] @@ -209,10 +220,10 @@ def plotting_func_tests(ensemble, do_samples=False): fig, axes = plot(pdf, axes=axes) assert fig is not None assert axes is not None - fig, axes = plot_native(pdf.frozen, xlim=(-5, 5)) + fig, axes = plot_native(pdf.frozen, xlim=(-5, 5)) assert fig is not None assert axes is not None - fig, axes = plot(pdf.frozen, xlim=(-5, 5)) + fig, axes = plot(pdf.frozen, xlim=(-5, 5)) assert fig is not None assert axes is not None if do_samples: diff --git a/src/qp/utils.py b/src/qp/utils.py index 7a24f23c..4b758a5e 100644 --- a/src/qp/utils.py +++ b/src/qp/utils.py @@ -8,13 +8,14 @@ epsilon = sys.float_info.epsilon infty = sys.float_info.max * epsilon -lims = (epsilon, 1.) +lims = (epsilon, 1.0) CASE_PRODUCT = 0 CASE_FACTOR = 1 CASE_2D = 2 CASE_FLAT = 3 + def safelog(arr, threshold=epsilon): """ Takes the natural logarithm of an array of potentially non-positive numbers @@ -65,9 +66,10 @@ def normalize_quantiles(in_data, threshold=epsilon, vb=False): return out_data """ + def edge_to_center(edges): """Return the centers of a set of bins given the edges""" - return 0.5*(edges[1:] + edges[:-1]) + return 0.5 * (edges[1:] + edges[:-1]) def bin_widths(edges): @@ -84,13 +86,13 @@ def get_bin_indices(bins, x): widths = bin_widths(bins) n_bins = np.size(bins) - 1 if np.allclose(widths, widths[0]): - idx = np.atleast_1d(np.floor((x-bins[0])/widths[0]).astype(int)) + idx = np.atleast_1d(np.floor((x - bins[0]) / widths[0]).astype(int)) else: - idx = np.atleast_1d(np.searchsorted(bins, x, side='left')-1) - mask = (idx >= 0) * (idx < bins.size-1) - np.putmask(idx, 1-mask, 0) + idx = np.atleast_1d(np.searchsorted(bins, x, side="left") - 1) + mask = (idx >= 0) * (idx < bins.size - 1) + np.putmask(idx, 1 - mask, 0) xshape = np.shape(x) - return idx.reshape(xshape).clip(0, n_bins-1), mask.reshape(xshape) + return idx.reshape(xshape).clip(0, n_bins - 1), mask.reshape(xshape) def normalize_interp1d(xvals, yvals): @@ -109,12 +111,14 @@ def normalize_interp1d(xvals, yvals): ynorm: array-like Normalized y-vals """ - #def row_integral(irow): + # def row_integral(irow): # return quad(interp1d(xvals[irow], yvals[irow], **kwargs), limits[0], limits[1])[0] - #vv = np.vectorize(row_integral) - #integrals = vv(np.arange(xvals.shape[0])) - integrals = np.sum(xvals[:,1:]*yvals[:,1:] - xvals[:,:-1]*yvals[:,1:], axis=1) + # vv = np.vectorize(row_integral) + # integrals = vv(np.arange(xvals.shape[0])) + integrals = np.sum( + xvals[:, 1:] * yvals[:, 1:] - xvals[:, :-1] * yvals[:, 1:], axis=1 + ) return (yvals.T / integrals).T @@ -135,8 +139,7 @@ def build_kdes(samples, **kwargs): ------- kdes : list of `scipy.stats.gaussian_kde` objects """ - return [ sps.gaussian_kde(row, **kwargs) for row in samples ] - + return [sps.gaussian_kde(row, **kwargs) for row in samples] def evaluate_kdes(xvals, kdes): @@ -159,7 +162,7 @@ def evaluate_kdes(xvals, kdes): def get_eval_case(x, row): - """ Figure out which of the various input formats scipy.stats has passed us + """Figure out which of the various input formats scipy.stats has passed us Parameters ---------- @@ -182,18 +185,21 @@ def get_eval_case(x, row): The cases are: CASE_FLAT : x, row have shapes (n), (n) and do not factor - CASE_FACTOR : x, row have shapes (n), (n) but can be factored to shapes (1, nx) and (npdf, 1) (i.e., they were flattend by scipy) + CASE_FACTOR : x, row have shapes (n), (n) but can be factored to shapes (1, nx) and (npdf, 1) + (i.e., they were flattend by scipy) CASE_PRODUCT : x, row have shapes (1, nx) and (npdf, 1) CASE_2D : x, row have shapes (npdf, nx) and (npdf, nx) """ nd_x = np.ndim(x) nd_row = np.ndim(row) - #if nd_x > 2 or nd_row > 2: #pragma: no cover + # if nd_x > 2 or nd_row > 2: #pragma: no cover # raise ValueError("Too many dimensions: x(%s), row(%s)" % (np.shape(x), np.shape(row))) if nd_x >= 2 and nd_row != 1: return CASE_2D, x, row - if nd_x >= 2 and nd_row == 1: #pragma: no cover - raise ValueError("Dimension mismatch: x(%s), row(%s)" % (np.shape(x), np.shape(row))) + if nd_x >= 2 and nd_row == 1: # pragma: no cover + raise ValueError( + "Dimension mismatch: x(%s), row(%s)" % (np.shape(x), np.shape(row)) + ) if nd_row >= 2: return CASE_PRODUCT, x, row if np.size(x) == 1 or np.size(row) == 1: @@ -209,7 +215,7 @@ def get_eval_case(x, row): return CASE_FACTOR, xx, np.expand_dims(rr, -1) -def evaluate_hist_x_multi_y_flat(x, row, bins, vals, derivs=None): #pragma: no cover +def evaluate_hist_x_multi_y_flat(x, row, bins, vals, derivs=None): # pragma: no cover """ Evaluate a set of values from histograms @@ -235,15 +241,19 @@ def evaluate_hist_x_multi_y_flat(x, row, bins, vals, derivs=None): #pragma: no deltas = np.zeros(idx.shape) else: deltas = x - bins[idx] + def evaluate_row(idxv, maskv, rv, delta): if derivs is None: return np.where(maskv, vals[rv, idxv], 0) - return np.where(maskv, vals[rv, idxv] + delta*derivs[rv, idxv], 0) + return np.where(maskv, vals[rv, idxv] + delta * derivs[rv, idxv], 0) + vv = np.vectorize(evaluate_row) return vv(idx, mask, row, deltas) -def evaluate_hist_x_multi_y_product(x, row, bins, vals, derivs=None): #pragma: no cover +def evaluate_hist_x_multi_y_product( + x, row, bins, vals, derivs=None +): # pragma: no cover """ Evaluate a set of values from histograms @@ -263,16 +273,20 @@ def evaluate_hist_x_multi_y_product(x, row, bins, vals, derivs=None): #pragma: out : array_like (npdf, npts) The histogram values """ - #assert np.ndim(x) < 2 and np.ndim(row) == 2 + # assert np.ndim(x) < 2 and np.ndim(row) == 2 idx, mask0 = get_bin_indices(bins, x) mask = np.ones(row.shape) * mask0 if derivs is None: - return np.where(mask, vals[:,idx][np.squeeze(row)], 0) + return np.where(mask, vals[:, idx][np.squeeze(row)], 0) deltas = x - bins[idx] - return np.where(mask, vals[:,idx][np.squeeze(row)] + deltas*derivs[:,idx][np.squeeze(row)] , 0) + return np.where( + mask, + vals[:, idx][np.squeeze(row)] + deltas * derivs[:, idx][np.squeeze(row)], + 0, + ) -def evaluate_hist_x_multi_y_2d(x, row, bins, vals, derivs=None): #pragma: no cover +def evaluate_hist_x_multi_y_2d(x, row, bins, vals, derivs=None): # pragma: no cover """ Evaluate a set of values from histograms @@ -302,7 +316,8 @@ def evaluate_hist_x_multi_y_2d(x, row, bins, vals, derivs=None): #pragma: no co def evaluate_row(idxv, maskv, rv, delta): if derivs is None: return np.where(maskv, vals[rv, idxv], 0) - return np.where(maskv, vals[rv, idxv] + delta*derivs[rv, idxv], 0) + return np.where(maskv, vals[rv, idxv] + delta * derivs[rv, idxv], 0) + vv = np.vectorize(evaluate_row) return vv(idx, mask, row, deltas) @@ -340,7 +355,9 @@ def evaluate_hist_x_multi_y(x, row, bins, vals, derivs=None): return evaluate_hist_x_multi_y_flat(xx, rr, bins, vals, derivs) -def evaluate_hist_multi_x_multi_y_flat(x, row, bins, vals, derivs=None): #pragma: no cover +def evaluate_hist_multi_x_multi_y_flat( + x, row, bins, vals, derivs=None +): # pragma: no cover """ Evaluate a set of values from histograms @@ -360,18 +377,22 @@ def evaluate_hist_multi_x_multi_y_flat(x, row, bins, vals, derivs=None): #pragm out : array_like (n) The histogram values """ + def evaluate_row(xv, rv): bins_row = bins[rv] idx, mask = get_bin_indices(bins_row, xv) delta = xv - bins_row[idx] if derivs is None: return np.where(mask, vals[rv, idx], 0) - return np.where(mask, vals[rv, idx] + delta*derivs[rv, idx], 0) + return np.where(mask, vals[rv, idx] + delta * derivs[rv, idx], 0) + vv = np.vectorize(evaluate_row) return vv(x, row) -def evaluate_hist_multi_x_multi_y_product(x, row, bins, vals, derivs=None): #pragma: no cover +def evaluate_hist_multi_x_multi_y_product( + x, row, bins, vals, derivs=None +): # pragma: no cover """ Evaluate a set of values from histograms @@ -391,18 +412,24 @@ def evaluate_hist_multi_x_multi_y_product(x, row, bins, vals, derivs=None): #pr out : array_like (npdf, npts) The histogram values """ + def evaluate_row(rv): bins_flat = bins[rv].flatten() idx, mask = get_bin_indices(bins_flat, x) delta = x - bins_flat[idx] if derivs is None: return np.where(mask, np.squeeze(vals[rv])[idx], 0).flatten() - return np.where(mask, np.squeeze(vals[rv])[idx] + delta* np.squeeze(derivs[rv])[idx], 0) + return np.where( + mask, np.squeeze(vals[rv])[idx] + delta * np.squeeze(derivs[rv])[idx], 0 + ) + vv = np.vectorize(evaluate_row, signature="(1)->(%i)" % (x.size)) return vv(row) -def evaluate_hist_multi_x_multi_y_2d(x, row, bins, vals, derivs=None): #pragma: no cover +def evaluate_hist_multi_x_multi_y_2d( + x, row, bins, vals, derivs=None +): # pragma: no cover """ Evaluate a set of values from histograms @@ -423,16 +450,21 @@ def evaluate_hist_multi_x_multi_y_2d(x, row, bins, vals, derivs=None): #pragma: The histogram values """ nx = np.shape(x)[-1] + def evaluate_row(rv, xv): flat_bins = bins[rv].flatten() idx, mask = get_bin_indices(flat_bins, xv) delta = xv - flat_bins[idx] if derivs is None: return np.where(mask, np.squeeze(vals[rv])[idx], 0).flatten() - return np.where(mask, np.squeeze(vals[rv])[idx] + delta*np.squeeze(derivs[rv])[idx], 0).flatten() + return np.where( + mask, np.squeeze(vals[rv])[idx] + delta * np.squeeze(derivs[rv])[idx], 0 + ).flatten() + vv = np.vectorize(evaluate_row, signature="(1),(%i)->(%i)" % (nx, nx)) return vv(row, x) + def evaluate_hist_multi_x_multi_y(x, row, bins, vals, derivs=None): """ Evaluate a set of values from histograms @@ -481,8 +513,10 @@ def interpolate_x_multi_y_flat(x, row, xvals, yvals, **kwargs): vals : array_like (npdf, n) The interpoalted values """ + def single_row(xv, rv): return interp1d(xvals, yvals[rv], **kwargs)(xv) + vv = np.vectorize(single_row) return vv(x, row) @@ -532,8 +566,10 @@ def interpolate_x_multi_y_2d(x, row, xvals, yvals, **kwargs): The interpoalted values """ nx = np.shape(x)[-1] + def evaluate_row(rv, xv): return interp1d(xvals, yvals[rv], **kwargs)(xv) + vv = np.vectorize(evaluate_row, signature="(1),(%i)->(%i)" % (nx, nx)) return vv(row, x) @@ -586,8 +622,10 @@ def interpolate_multi_x_multi_y_flat(x, row, xvals, yvals, **kwargs): vals : array_like (npdf, n) The interpoalted values """ + def single_row(xv, rv): return interp1d(xvals[rv], yvals[rv], **kwargs)(xv) + vv = np.vectorize(single_row) return vv(x, row) @@ -614,8 +652,10 @@ def interpolate_multi_x_multi_y_product(x, row, xvals, yvals, **kwargs): """ rr = np.squeeze(row) nx = np.shape(x)[-1] + def single_row(rv): return interp1d(xvals[rv], yvals[rv], **kwargs)(x) + vv = np.vectorize(single_row, signature="()->(%i)" % (nx)) return vv(rr) @@ -641,8 +681,10 @@ def interpolate_multi_x_multi_y_2d(x, row, xvals, yvals, **kwargs): The interpoalted values """ nx = np.shape(x)[-1] + def evaluate_row(rv, xv): return interp1d(xvals[rv], yvals[rv], **kwargs)(xv) + vv = np.vectorize(evaluate_row, signature="(),(%i)->(%i)" % (nx, nx)) return vv(np.squeeze(row), x) @@ -695,8 +737,10 @@ def interpolate_multi_x_y_flat(x, row, xvals, yvals, **kwargs): vals : array_like (npdf, n) The interpoalted values """ + def single_row(xv, rv): return interp1d(xvals[rv], yvals, **kwargs)(xv) + vv = np.vectorize(single_row) return vv(x, row) @@ -723,8 +767,10 @@ def interpolate_multi_x_y_product(x, row, xvals, yvals, **kwargs): """ rr = np.squeeze(row) nx = np.shape(x)[-1] + def single_row(rv): return interp1d(xvals[rv], yvals, **kwargs)(x) + vv = np.vectorize(single_row, signature="()->(%i)" % (nx)) return vv(rr) @@ -750,8 +796,10 @@ def interpolate_multi_x_y_2d(x, row, xvals, yvals, **kwargs): The interpoalted values """ nx = np.shape(x)[-1] + def evaluate_row(rv, xv): return interp1d(xvals[rv], yvals, **kwargs)(xv) + vv = np.vectorize(evaluate_row, signature="(),(%i)->(%i)" % (nx, nx)) return vv(np.squeeze(row), x) @@ -806,13 +854,13 @@ def profile(x_data, y_data, x_bins, std=True): The standard deviations or errors on the means """ idx, mask = get_bin_indices(x_bins, x_data) - count = np.zeros(x_bins.size-1) - vals = np.zeros(x_bins.size-1) - errs = np.zeros(x_bins.size-1) - for i in range(x_bins.size-1): + count = np.zeros(x_bins.size - 1) + vals = np.zeros(x_bins.size - 1) + errs = np.zeros(x_bins.size - 1) + for i in range(x_bins.size - 1): mask_col = mask * (idx == i) count[i] = mask_col.sum() - if mask_col.sum() == 0: #pragma: no cover + if mask_col.sum() == 0: # pragma: no cover vals[i] = np.nan errs[i] = np.nan continue @@ -823,6 +871,7 @@ def profile(x_data, y_data, x_bins, std=True): errs /= np.sqrt(count) return vals, errs + def reshape_to_pdf_size(vals, split_dim): """Reshape an array to match the number of PDFs in a distribution @@ -832,7 +881,7 @@ def reshape_to_pdf_size(vals, split_dim): The input array split_dim : int The dimension at which to split between pdf indices and per_pdf indices - + Returns ------- out : array diff --git a/src/qp/version.py b/src/qp/version.py index c7772d98..7f2508ea 100644 --- a/src/qp/version.py +++ b/src/qp/version.py @@ -2,6 +2,7 @@ def find_version(): # setuptools_scm should install a # file _version alongside this one. from . import _version # pylint: disable=import-outside-toplevel + return _version.version diff --git a/tests/qp/benchmark.py b/tests/qp/benchmark.py index 6a50277b..5b27a448 100644 --- a/tests/qp/benchmark.py +++ b/tests/qp/benchmark.py @@ -1,72 +1,85 @@ """ Timing for qp distributions """ -import qp -import numpy as np import time -testfile = 'qp_test_ensemble.hdf5' +import numpy as np + +import qp + +testfile = "qp_test_ensemble.hdf5" + def time_ensemble(ens): - """ Time main scipy functions """ + """Time main scipy functions""" - zv = np.linspace(0., 2.5, 201) + zv = np.linspace(0.0, 2.5, 201) quants = np.linspace(0.01, 0.99, 50) nsamples = 100 - print("Timing %s on %i PDFS, with %i grid points, %i quantiles and %i samples" % #pylint: disable=bad-string-format-type - (type(ens.gen_obj), ens.frozen.npdf, zv.size, quants.size, nsamples)) + print( + "Timing %s on %i PDFS, with %i grid points, %i quantiles and %i samples" # pylint: disable=bad-string-format-type + % ( + type(ens.gen_obj), + ens.frozen.npdf, + zv.size, + quants.size, + nsamples, + ) + ) t0 = time.time() _ = ens.pdf(zv) t1 = time.time() - print("pdf %.2f s" % (t1-t0)) + print("pdf %.2f s" % (t1 - t0)) t0 = time.time() _ = ens.cdf(zv) t1 = time.time() - print("cdf %.2f s" % (t1-t0)) + print("cdf %.2f s" % (t1 - t0)) t0 = time.time() _ = ens.ppf(quants) t1 = time.time() - print("ppf %.2f s" % (t1-t0)) + print("ppf %.2f s" % (t1 - t0)) t0 = time.time() _ = ens.sf(zv) t1 = time.time() - print("sf %.2f s" % (t1-t0)) + print("sf %.2f s" % (t1 - t0)) t0 = time.time() _ = ens.isf(zv) t1 = time.time() - print("isf %.2f s" % (t1-t0)) + print("isf %.2f s" % (t1 - t0)) t0 = time.time() _ = ens.rvs(size=nsamples) t1 = time.time() - print("rvs %.2f s" % (t1-t0)) + print("rvs %.2f s" % (t1 - t0)) def time_convert(ens, cls_to, **kwds): - """ Time conversion function """ + """Time conversion function""" t0 = time.time() ens_out = ens.convert_to(cls_to, **kwds) t1 = time.time() - print("Convert %s to %s with %i pdfs in %.2f s" % (type(ens.gen_obj), cls_to, ens.frozen.npdf, t1-t0)) + print( + "Convert %s to %s with %i pdfs in %.2f s" + % (type(ens.gen_obj), cls_to, ens.frozen.npdf, t1 - t0) + ) return ens_out - def main(): - """ Main """ + """Main""" t0 = time.time() ens_orig = qp.read(testfile) t1 = time.time() - print("Read %.2f s" % (t1-t0)) + print("Read %.2f s" % (t1 - t0)) time_ensemble(ens_orig) - bins = np.linspace(0., 2.5, 101) + bins = np.linspace(0.0, 2.5, 101) quants = np.linspace(0.01, 0.99, 50) ens_i = time_convert(ens_orig, qp.interp_gen, xvals=bins) @@ -78,10 +91,10 @@ def main(): ens_q = time_convert(ens_orig, qp.quant_gen, quants=quants) time_ensemble(ens_q) - #ens_s = time_convert(ens_orig, qp.spline_gen, xvals=bins) + # ens_s = time_convert(ens_orig, qp.spline_gen, xvals=bins) # skip this, it sucks - #time_ensemble(ens_s) + # time_ensemble(ens_s) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/tests/qp/fidelity.py b/tests/qp/fidelity.py index e7dce685..0deba90b 100644 --- a/tests/qp/fidelity.py +++ b/tests/qp/fidelity.py @@ -1,21 +1,23 @@ """ More detailed tests for conversion fidelity """ -import qp -import numpy as np import time import matplotlib.pyplot as plt +import numpy as np + +import qp + +testfile = "../tests/qp_test_ensemble.hdf5" -testfile = '../tests/qp_test_ensemble.hdf5' def run_metrics(prefix, ens_orig, ens_test): - """ Run matching metrics """ + """Run matching metrics""" t0 = time.time() - kld = qp.metrics.calculate_kld(ens_orig, ens_test, limits=(0.,3.)) + kld = qp.metrics.calculate_kld(ens_orig, ens_test, limits=(0.0, 3.0)) t1 = time.time() - print("kld: mean %.2e, std %.2e in %.2f s" % (kld.mean(), kld.std(), t1-t0)) + print("kld: mean %.2e, std %.2e in %.2f s" % (kld.mean(), kld.std(), t1 - t0)) fig = plt.figure() axes = fig.subplots() @@ -24,27 +26,27 @@ def run_metrics(prefix, ens_orig, ens_test): def main(): - """ Main """ + """Main""" ens_orig = qp.read(testfile) print("Running conversions for interp, quant, hist") - ens_g51 = qp.convert(ens_orig, 'interp', xvals=np.linspace(0, 3, 51)) - ens_g21 = qp.convert(ens_orig, 'interp', xvals=np.linspace(0, 3, 21)) - ens_q51 = qp.convert(ens_orig, 'quant', quants=np.linspace(0.01, 0.99, 51)) - ens_q21 = qp.convert(ens_orig, 'quant', quants=np.linspace(0.01, 0.99, 21)) - ens_h51 = qp.convert(ens_orig, 'hist', bins=np.linspace(0, 3.0, 51)) - ens_h21 = qp.convert(ens_orig, 'hist', bins=np.linspace(0, 3.0, 21)) + ens_g51 = qp.convert(ens_orig, "interp", xvals=np.linspace(0, 3, 51)) + ens_g21 = qp.convert(ens_orig, "interp", xvals=np.linspace(0, 3, 21)) + ens_q51 = qp.convert(ens_orig, "quant", quants=np.linspace(0.01, 0.99, 51)) + ens_q21 = qp.convert(ens_orig, "quant", quants=np.linspace(0.01, 0.99, 21)) + ens_h51 = qp.convert(ens_orig, "hist", bins=np.linspace(0, 3.0, 51)) + ens_h21 = qp.convert(ens_orig, "hist", bins=np.linspace(0, 3.0, 21)) print("Running conversinos for spline, mixmod, sparse") ens_red = ens_orig[np.arange(100)] - ens_s51 = qp.convert(ens_red, 'spline', xvals=np.linspace(0, 3.0, 51), method="xy") - ens_s21 = qp.convert(ens_red, 'spline', xvals=np.linspace(0, 3.0, 21), method="xy") - ens_m3 = qp.convert(ens_red, 'mixmod', xvals=np.linspace(0, 3.0, 301), ncomps=3) - ens_m5 = qp.convert(ens_red, 'mixmod', xvals=np.linspace(0, 3.0, 301), ncomps=3) - ens_sp = qp.convert(ens_red, class_name='voigt', method='xy') - - label_list = ['g51', 'g21', 'q51', 'q21', 'h51', 'h21'] - label_list2 = ['s51', 's21', 'm3', 'm5', 'sp'] + ens_s51 = qp.convert(ens_red, "spline", xvals=np.linspace(0, 3.0, 51), method="xy") + ens_s21 = qp.convert(ens_red, "spline", xvals=np.linspace(0, 3.0, 21), method="xy") + ens_m3 = qp.convert(ens_red, "mixmod", xvals=np.linspace(0, 3.0, 301), ncomps=3) + ens_m5 = qp.convert(ens_red, "mixmod", xvals=np.linspace(0, 3.0, 301), ncomps=3) + ens_sp = qp.convert(ens_red, class_name="voigt", method="xy") + + label_list = ["g51", "g21", "q51", "q21", "h51", "h21"] + label_list2 = ["s51", "s21", "m3", "m5", "sp"] ens_list = [ens_g51, ens_g21, ens_q51, ens_q21, ens_h51, ens_h21] ens_list2 = [ens_s51, ens_s21, ens_m3, ens_m5, ens_sp] diff --git a/tests/qp/quant_pdf/test_quant_pdf.py b/tests/qp/quant_pdf/test_quant_pdf.py index d78b41dd..424ab8dd 100644 --- a/tests/qp/quant_pdf/test_quant_pdf.py +++ b/tests/qp/quant_pdf/test_quant_pdf.py @@ -2,37 +2,40 @@ Unit tests for quant_pdf class """ import logging -import numpy as np import unittest -import qp +import numpy as np + +import qp from qp.quantile_pdf_constructors import AbstractQuantilePdfConstructor class QuantPdfTestCase(unittest.TestCase): - """ Class to test quant_pdf qp.Ensemble functionality """ + """Class to test quant_pdf qp.Ensemble functionality""" def test_quant_get_default_pdf_constructor_name(self): """Test that the getter for pdf constructor name works""" quantiles = np.linspace(0.001, 0.999, 16) locations = np.linspace(0, 5, 16) quant_dist = qp.quant(quants=quantiles, locs=locations) - self.assertEqual(quant_dist.dist.pdf_constructor_name, 'piecewise_linear') + self.assertEqual(quant_dist.dist.pdf_constructor_name, "piecewise_linear") def test_quant_get_default_pdf_constructor(self): """Test that the getter for pdf constructor returns an AbstractQuantilePdfConstructor""" quantiles = np.linspace(0.001, 0.999, 16) locations = np.linspace(0, 5, 16) quant_dist = qp.quant(quants=quantiles, locs=locations) - assert isinstance(quant_dist.dist.pdf_constructor, AbstractQuantilePdfConstructor) + assert isinstance( + quant_dist.dist.pdf_constructor, AbstractQuantilePdfConstructor + ) def test_quant_change_pdf_constructor(self): """Test that changing the pdf constructor works as expected""" quantiles = np.linspace(0.001, 0.999, 16) locations = np.linspace(0, 5, 16) quant_dist = qp.quant(quants=quantiles, locs=locations) - quant_dist.dist.pdf_constructor_name = 'piecewise_constant' - self.assertEqual(quant_dist.dist.pdf_constructor_name, 'piecewise_constant') + quant_dist.dist.pdf_constructor_name = "piecewise_constant" + self.assertEqual(quant_dist.dist.pdf_constructor_name, "piecewise_constant") def test_quant_change_pdf_constructor_raises(self): """Verify that attempting to change the pdf constructor to one that @@ -41,7 +44,7 @@ def test_quant_change_pdf_constructor_raises(self): locations = np.linspace(0, 5, 16) quant_dist = qp.quant(quants=quantiles, locs=locations) with self.assertRaises(ValueError): - quant_dist.dist.pdf_constructor_name = 'drewtonian' + quant_dist.dist.pdf_constructor_name = "drewtonian" def test_quant_change_pdf_constructor_warns(self): """Verify that attempting to change the pdf constructor to the one @@ -50,5 +53,5 @@ def test_quant_change_pdf_constructor_warns(self): locations = np.linspace(0, 5, 16) quant_dist = qp.quant(quants=quantiles, locs=locations) with self.assertLogs(level=logging.WARNING) as log: - quant_dist.dist.pdf_constructor_name = 'piecewise_linear' - self.assertIn('Already using', log.output[0]) \ No newline at end of file + quant_dist.dist.pdf_constructor_name = "piecewise_linear" + self.assertIn("Already using", log.output[0]) diff --git a/tests/qp/quantile_pdf_constructors/test_cdf_spline_derivative.py b/tests/qp/quantile_pdf_constructors/test_cdf_spline_derivative.py index ad1c7540..2e999f1d 100644 --- a/tests/qp/quantile_pdf_constructors/test_cdf_spline_derivative.py +++ b/tests/qp/quantile_pdf_constructors/test_cdf_spline_derivative.py @@ -3,16 +3,18 @@ import numpy as np import qp -from qp.quantile_pdf_constructors import AbstractQuantilePdfConstructor, \ - CdfSplineDerivative +from qp.quantile_pdf_constructors import (AbstractQuantilePdfConstructor, + CdfSplineDerivative) class CdfSplineDerivativeTestCase(unittest.TestCase): """Tests for the CDF Spline Derivative PDF constructor for quantile parameterization.""" def setUp(self): - self.single_norm = qp.stats.norm(loc=3, scale=0.5) - self.many_norm = qp.stats.norm(loc=np.array([[1], [2.5], [3]]), scale=np.array([[0.25], [0.5], [0.1]])) + self.single_norm = qp.stats.norm(loc=3, scale=0.5) # pylint: disable=no-member + self.many_norm = qp.stats.norm( # pylint: disable=no-member + loc=np.array([[1], [2.5], [3]]), scale=np.array([[0.25], [0.5], [0.1]]) + ) self.user_defined_quantiles = np.linspace(0.001, 0.999, 16) self.user_defined_grid = np.linspace(0, 5, 100) @@ -21,20 +23,30 @@ def setUp(self): def test_instantiation(self): """Base case make sure that we can instantiate the class""" user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) assert isinstance(pdf_constructor, AbstractQuantilePdfConstructor) def test_instantiation_for_multiple_distributions(self): """Base case make sure that we can instantiate a pdf reconstructor for multiple distributions""" user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) assert isinstance(pdf_constructor, AbstractQuantilePdfConstructor) def test_debug(self): """Ensure that debug returns expected values before `prepare_constructor` has been run""" user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) - debug_quantiles, debug_locations, debug_interp_functions = pdf_constructor.debug() + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) + ( + debug_quantiles, + debug_locations, + debug_interp_functions, + ) = pdf_constructor.debug() print(debug_quantiles) assert np.all(np.isclose(debug_quantiles, self.user_defined_quantiles)) @@ -42,25 +54,35 @@ def test_debug(self): self.assertIsNone(debug_interp_functions) def test_basic_construct_pdf(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for single distribution case - Want to verify only that the machinery is working, and the result is not just zeros.""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments for + single distribution case. Want to verify only that the machinery is working, + and the result is not just zeros. + """ user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) results = pdf_constructor.construct_pdf(self.user_defined_grid) self.assertIsNot(np.sum(results), 0.0) def test_basic_construct_pdf_for_multiple_distributions(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for many-distribution case - Want to verify only that the machinery is working, and the result is not just zeros.""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments + for many-distribution case. Want to verify only that the machinery is working, + and the result is not just zeros. + """ user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) results = pdf_constructor.construct_pdf(self.user_defined_grid) self.assertIsNot(np.sum(results), 0.0) def test_passing_different_spline_orders(self): """Verify that passing a new spline_order value to `prepare_constructor` results in a change.""" user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) default_spline_order = pdf_constructor.construct_pdf(self.user_defined_grid) # change the spline order - expect the reconstructed pdf to be different than @@ -71,11 +93,16 @@ def test_passing_different_spline_orders(self): assert np.any(np.not_equal(default_spline_order, different_spline_order)) def test_basic_construct_pdf_for_subset_of_multiple_distributions(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for many-distribution case - when passing in a `row` value. Want to verify only that the machinery is working, and that the size of the - output matches expectations""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments + for many-distribution case when passing in a `row` value. Want to verify only + that the machinery is working, and that the size of the output matches expectations + """ user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) - user_defined_rows = [0,1] - results = pdf_constructor.construct_pdf(self.user_defined_grid, row=user_defined_rows) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) + user_defined_rows = [0, 1] + results = pdf_constructor.construct_pdf( + self.user_defined_grid, row=user_defined_rows + ) self.assertEqual(len(results), 2) diff --git a/tests/qp/quantile_pdf_constructors/test_dual_spline_average.py b/tests/qp/quantile_pdf_constructors/test_dual_spline_average.py index 6575a884..9f56899e 100644 --- a/tests/qp/quantile_pdf_constructors/test_dual_spline_average.py +++ b/tests/qp/quantile_pdf_constructors/test_dual_spline_average.py @@ -3,16 +3,18 @@ import numpy as np import qp -from qp.quantile_pdf_constructors import AbstractQuantilePdfConstructor, \ - DualSplineAverage +from qp.quantile_pdf_constructors import (AbstractQuantilePdfConstructor, + DualSplineAverage) class DualSplineAverageTestCase(unittest.TestCase): """Tests for the CDF Spline Derivative PDF constructor for quantile parameterization.""" def setUp(self): - self.single_norm = qp.stats.norm(loc=3, scale=0.5) - self.many_norm = qp.stats.norm(loc=np.array([[1], [2.5], [3]]), scale=np.array([[0.25], [0.5], [0.1]])) + self.single_norm = qp.stats.norm(loc=3, scale=0.5) # pylint: disable=no-member + self.many_norm = qp.stats.norm( # pylint: disable=no-member + loc=np.array([[1], [2.5], [3]]), scale=np.array([[0.25], [0.5], [0.1]]) + ) self.user_defined_quantiles = np.linspace(0.001, 0.999, 16) self.user_defined_grid = np.linspace(0, 5, 100) @@ -21,20 +23,32 @@ def setUp(self): def test_instantiation(self): """Base case make sure that we can instantiate the class""" user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) assert isinstance(pdf_constructor, AbstractQuantilePdfConstructor) def test_instantiation_for_multiple_distributions(self): """Base case make sure that we can instantiate a pdf reconstructor for multiple distributions""" user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) assert isinstance(pdf_constructor, AbstractQuantilePdfConstructor) def test_debug(self): """Ensure that debug returns expected values before `prepare_constructor` has been run""" user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) - debug_quantiles, debug_locations, debug_p_of_zs, debug_y1, debug_y2 = pdf_constructor.debug() + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) + ( + debug_quantiles, + debug_locations, + debug_p_of_zs, + debug_y1, + debug_y2, + ) = pdf_constructor.debug() print(debug_quantiles) assert np.all(np.isclose(debug_quantiles, self.user_defined_quantiles)) @@ -44,28 +58,41 @@ def test_debug(self): self.assertIsNone(debug_y2) def test_basic_construct_pdf(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for single distribution case - Want to verify only that the machinery is working, and the result is not just zeros.""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments + for single distribution case. Want to verify only that the machinery is working, + and the result is not just zeros. + """ user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) results = pdf_constructor.construct_pdf(self.user_defined_grid) self.assertIsNot(np.sum(results), 0.0) def test_basic_construct_pdf_for_multiple_distributions(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for many-distribution case - Want to verify only that the machinery is working, and the result is not just zeros.""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments + for many-distribution case. Want to verify only that the machinery is working, + and the result is not just zeros. + """ user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) results = pdf_constructor.construct_pdf(self.user_defined_grid) self.assertIsNot(np.sum(results), 0.0) self.assertEqual(len(results), 3) def test_basic_construct_pdf_for_subset_of_multiple_distributions(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for many-distribution case - when passing in a `row` value. Want to verify only that the machinery is working, and that the size of the - output matches expectations""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments + for many-distribution case when passing in a `row` value. Want to verify only + that the machinery is working, and that the size of the output matches expectations + """ user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) - user_defined_rows = [0,1] - results = pdf_constructor.construct_pdf(self.user_defined_grid, row=user_defined_rows) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) + user_defined_rows = [0, 1] + results = pdf_constructor.construct_pdf( + self.user_defined_grid, row=user_defined_rows + ) self.assertEqual(len(results), 2) diff --git a/tests/qp/quantile_pdf_constructors/test_piecewise_constant.py b/tests/qp/quantile_pdf_constructors/test_piecewise_constant.py index 7dcd14d2..2cf3298c 100644 --- a/tests/qp/quantile_pdf_constructors/test_piecewise_constant.py +++ b/tests/qp/quantile_pdf_constructors/test_piecewise_constant.py @@ -3,16 +3,18 @@ import numpy as np import qp -from qp.quantile_pdf_constructors import AbstractQuantilePdfConstructor, \ - PiecewiseConstant +from qp.quantile_pdf_constructors import (AbstractQuantilePdfConstructor, + PiecewiseConstant) class PiecewiseLinearTestCase(unittest.TestCase): """Tests for the CDF Spline Derivative PDF constructor for quantile parameterization.""" def setUp(self): - self.single_norm = qp.stats.norm(loc=3, scale=0.5) - self.many_norm = qp.stats.norm(loc=np.array([[1], [2.5], [3]]), scale=np.array([[0.25], [0.5], [0.1]])) + self.single_norm = qp.stats.norm(loc=3, scale=0.5) # pylint: disable=no-member + self.many_norm = qp.stats.norm( # pylint: disable=no-member + loc=np.array([[1], [2.5], [3]]), scale=np.array([[0.25], [0.5], [0.1]]) + ) self.user_defined_quantiles = np.linspace(0.001, 0.999, 16) self.user_defined_grid = np.linspace(0, 5, 100) @@ -21,20 +23,32 @@ def setUp(self): def test_instantiation(self): """Base case make sure that we can instantiate the class""" user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) assert isinstance(pdf_constructor, AbstractQuantilePdfConstructor) def test_instantiation_for_multiple_distributions(self): """Base case make sure that we can instantiate a pdf reconstructor for multiple distributions""" user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) assert isinstance(pdf_constructor, AbstractQuantilePdfConstructor) def test_debug(self): """Ensure that debug returns expected values before `prepare_constructor` has been run""" user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) - debug_quantiles, debug_locations, debug_first_derivatives, debug_second_derivative, debug_adjusted_locations = pdf_constructor.debug() + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) + ( + debug_quantiles, + debug_locations, + debug_first_derivatives, + debug_second_derivative, + debug_adjusted_locations, + ) = pdf_constructor.debug() assert np.all(np.isclose(debug_quantiles, self.user_defined_quantiles)) assert np.all(np.isclose(debug_locations, user_defined_locations)) @@ -43,17 +57,25 @@ def test_debug(self): self.assertIsNone(debug_adjusted_locations) def test_basic_construct_pdf(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for single distribution case - Want to verify only that the machinery is working, and the result is not just zeros.""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments + for single distribution case. Want to verify only that the machinery is working, + and the result is not just zeros. + """ user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) results = pdf_constructor.construct_pdf(self.user_defined_grid) self.assertIsNot(np.sum(results), 0.0) def test_basic_construct_pdf_for_multiple_distributions(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for many-distribution case - Want to verify only that the machinery is working, and the result is not just zeros.""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments + for many-distribution case. Want to verify only that the machinery is working, + and the result is not just zeros. + """ user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) results = pdf_constructor.construct_pdf(self.user_defined_grid) - self.assertIsNot(np.sum(results), 0.0) \ No newline at end of file + self.assertIsNot(np.sum(results), 0.0) diff --git a/tests/qp/quantile_pdf_constructors/test_piecewise_linear.py b/tests/qp/quantile_pdf_constructors/test_piecewise_linear.py index 2a7846c8..4e36ec5f 100644 --- a/tests/qp/quantile_pdf_constructors/test_piecewise_linear.py +++ b/tests/qp/quantile_pdf_constructors/test_piecewise_linear.py @@ -3,16 +3,18 @@ import numpy as np import qp -from qp.quantile_pdf_constructors import AbstractQuantilePdfConstructor, \ - PiecewiseLinear +from qp.quantile_pdf_constructors import (AbstractQuantilePdfConstructor, + PiecewiseLinear) class PiecewiseLinearTestCase(unittest.TestCase): """Tests for the CDF Spline Derivative PDF constructor for quantile parameterization.""" def setUp(self): - self.single_norm = qp.stats.norm(loc=3, scale=0.5) - self.many_norm = qp.stats.norm(loc=np.array([[1], [2.5], [3]]), scale=np.array([[0.25], [0.5], [0.1]])) + self.single_norm = qp.stats.norm(loc=3, scale=0.5) # pylint: disable=no-member + self.many_norm = qp.stats.norm( # pylint: disable=no-member + loc=np.array([[1], [2.5], [3]]), scale=np.array([[0.25], [0.5], [0.1]]) + ) self.user_defined_quantiles = np.linspace(0.001, 0.999, 16) self.user_defined_grid = np.linspace(0, 5, 100) @@ -21,20 +23,31 @@ def setUp(self): def test_instantiation(self): """Base case make sure that we can instantiate the class""" user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) assert isinstance(pdf_constructor, AbstractQuantilePdfConstructor) def test_instantiation_for_multiple_distributions(self): """Base case make sure that we can instantiate a pdf reconstructor for multiple distributions""" user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) assert isinstance(pdf_constructor, AbstractQuantilePdfConstructor) def test_debug(self): """Ensure that debug returns expected values before `prepare_constructor` has been run""" user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) - debug_quantiles, debug_locations, debug_first_derivatives, debug_adjusted_locations = pdf_constructor.debug() + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) + ( + debug_quantiles, + debug_locations, + debug_first_derivatives, + debug_adjusted_locations, + ) = pdf_constructor.debug() assert np.all(np.isclose(debug_quantiles, self.user_defined_quantiles)) assert np.all(np.isclose(debug_locations, user_defined_locations)) @@ -42,17 +55,25 @@ def test_debug(self): self.assertIsNone(debug_adjusted_locations) def test_basic_construct_pdf(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for single distribution case - Want to verify only that the machinery is working, and the result is not just zeros.""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments + for single distribution case. Want to verify only that the machinery is working, + and the result is not just zeros. + """ user_defined_locations = self.single_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) results = pdf_constructor.construct_pdf(self.user_defined_grid) self.assertIsNot(np.sum(results), 0.0) def test_basic_construct_pdf_for_multiple_distributions(self): - """Base case to ensure that `construct_pdf` method runs with minimum arguments for many-distribution case - Want to verify only that the machinery is working, and the result is not just zeros.""" + """Base case to ensure that `construct_pdf` method runs with minimum arguments + for many-distribution case. Want to verify only that the machinery is working, + and the result is not just zeros. + """ user_defined_locations = self.many_norm.ppf(self.user_defined_quantiles) - pdf_constructor = self.pdf_constructor(quantiles=self.user_defined_quantiles, locations=user_defined_locations) + pdf_constructor = self.pdf_constructor( + quantiles=self.user_defined_quantiles, locations=user_defined_locations + ) results = pdf_constructor.construct_pdf(self.user_defined_grid) - self.assertIsNot(np.sum(results), 0.0) \ No newline at end of file + self.assertIsNot(np.sum(results), 0.0) diff --git a/tests/qp/test_auto.py b/tests/qp/test_auto.py index 6fd0a057..c4aedc0b 100644 --- a/tests/qp/test_auto.py +++ b/tests/qp/test_auto.py @@ -1,16 +1,15 @@ """ Unit tests for PDF class """ -import sys import unittest from functools import partial -import qp +import qp from qp import test_funcs class PDFTestCase(unittest.TestCase): - """ Class to manage automatically generated tests for qp distributions """ + """Class to manage automatically generated tests for qp distributions""" def setUp(self): """ @@ -20,33 +19,48 @@ def setUp(self): def tearDown(self): "Clean up any mock data files created by the tests." - @classmethod def auto_add_class(cls, test_class, ens_list): """Add tests as member functions to a class""" for key, val in test_class.test_data.items(): - test_pdf = val.pop('test_pdf', True) + test_pdf = val.pop("test_pdf", True) if test_pdf: - kw_test_pdf = dict(short=val.pop('short', False), check_props=val.pop('check_props', True)) - the_pdf_func = partial(test_funcs.run_pdf_func_tests, test_class, val, **kw_test_pdf) - setattr(cls, 'test_pdf_%s' % key, the_pdf_func) - test_persist = val.pop('test_persist', True) + kw_test_pdf = dict( + short=val.pop("short", False), + check_props=val.pop("check_props", True), + ) + the_pdf_func = partial( + test_funcs.run_pdf_func_tests, test_class, val, **kw_test_pdf + ) + setattr(cls, "test_pdf_%s" % key, the_pdf_func) + test_persist = val.pop("test_persist", True) if test_persist: the_persist_func = partial(test_funcs.run_persist_func_tests, val) - setattr(cls, 'test_persist_%s' % key, the_persist_func) - test_convert = val.pop('test_convert', True) - if 'convert_data' not in val: + setattr(cls, "test_persist_%s" % key, the_persist_func) + test_convert = val.pop("test_convert", True) + if "convert_data" not in val: test_convert = False if test_convert: - kw_test_convert = dict(atol_diff=val.pop('atol_diff', 1e-2), atol_diff2=val.pop('atol_diff2', 1e-2)) + kw_test_convert = dict( + atol_diff=val.pop("atol_diff", 1e-2), + atol_diff2=val.pop("atol_diff2", 1e-2), + ) for i, ens in enumerate(ens_list): - the_convert_func = partial(test_funcs.run_convert_tests, ens_orig=ens, gen_class=test_class, test_data=val, **kw_test_convert) - setattr(cls, 'test_convert_%s_%i' % (key, i), the_convert_func) - test_plot = val.pop('test_plot', True) + the_convert_func = partial( + test_funcs.run_convert_tests, + ens_orig=ens, + gen_class=test_class, + test_data=val, + **kw_test_convert, + ) + setattr(cls, "test_convert_%s_%i" % (key, i), the_convert_func) + test_plot = val.pop("test_plot", True) if test_plot: - kw_test_plot = dict(do_samples=val.pop('do_samples', False)) - the_plot_func = partial(test_funcs.run_plotting_func_tests, val, **kw_test_plot) - setattr(cls, 'test_plotting_%s' % key, the_plot_func) + kw_test_plot = dict(do_samples=val.pop("do_samples", False)) + the_plot_func = partial( + test_funcs.run_plotting_func_tests, val, **kw_test_plot + ) + setattr(cls, "test_plotting_%s" % key, the_plot_func) @classmethod def auto_add(cls, class_list, ens_orig): @@ -56,16 +70,20 @@ def auto_add(cls, class_list, ens_orig): test_class.make_test_data() except AttributeError: pass - if hasattr(test_class, 'test_data'): + if hasattr(test_class, "test_data"): cls.auto_add_class(test_class, ens_orig) -ENS_ORIG = test_funcs.build_ensemble(qp.stats.norm_gen.test_data['norm']) #pylint: disable=no-member -ENS_MULTI = test_funcs.build_ensemble(qp.stats.norm_gen.test_data['norm']) #pylint: disable=no-member +ENS_ORIG = test_funcs.build_ensemble( + qp.stats.norm_gen.test_data["norm"] # pylint: disable=no-member +) +ENS_MULTI = test_funcs.build_ensemble( + qp.stats.norm_gen.test_data["norm"] # pylint: disable=no-member +) TEST_CLASSES = qp.instance().values() PDFTestCase.auto_add(TEST_CLASSES, [ENS_ORIG, ENS_MULTI]) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/qp/test_base_metric_classes.py b/tests/qp/test_base_metric_classes.py index 6076caaa..6faa5483 100644 --- a/tests/qp/test_base_metric_classes.py +++ b/tests/qp/test_base_metric_classes.py @@ -1,13 +1,12 @@ # pylint: disable=protected-access import pytest -from qp.metrics.base_metric_classes import ( - SingleEnsembleMetric, - DistToDistMetric, - DistToPointMetric, - PointToPointMetric, - PointToDistMetric, -) + +from qp.metrics.base_metric_classes import (DistToDistMetric, + DistToPointMetric, + PointToDistMetric, + PointToPointMetric, + SingleEnsembleMetric) def test_single_ensemble_metrics(): diff --git a/tests/qp/test_brier.py b/tests/qp/test_brier.py index 07c2f150..30e266f3 100644 --- a/tests/qp/test_brier.py +++ b/tests/qp/test_brier.py @@ -1,30 +1,33 @@ -import unittest import logging +import unittest + import numpy as np + from qp.metrics.brier import Brier LOGGER = logging.getLogger(__name__) + class BrierTestCase(unittest.TestCase): - """ Test cases for the Brier metric. """ + """Test cases for the Brier metric.""" def test_brier_base(self): """ Test the base case, ensure output is expected. """ - pred = [[1,0,0], [1,0,0]] - truth = [[1,0,0], [0,1,0]] + pred = [[1, 0, 0], [1, 0, 0]] + truth = [[1, 0, 0], [0, 1, 0]] brier_obj = Brier(pred, truth) result = brier_obj.evaluate() - expected = 1. + expected = 1.0 assert np.isclose(result, expected) def test_brier_result_is_scalar(self): """ Verify output is scalar for input of NxM. """ - pred = [[1,0,0], [0,1,0], [0,0.5,0.5]] - truth = [[1,0,0], [0,1,0], [0,0,1]] + pred = [[1, 0, 0], [0, 1, 0], [0, 0.5, 0.5]] + truth = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] brier_obj = Brier(pred, truth) result = brier_obj.evaluate() assert np.isscalar(result) @@ -33,8 +36,8 @@ def test_brier_base_with_non_integers(self): """ Verify output for non-integer prediction values. """ - pred = [[0.5,0.5,0]] - truth = [[1,0,0]] + pred = [[0.5, 0.5, 0]] + truth = [[1, 0, 0]] brier_obj = Brier(pred, truth) result = brier_obj.evaluate() expected = 0.5 @@ -45,11 +48,11 @@ def test_brier_max_result(self): Base case where prediction is completely wrong, should produce maximum possible result value, 2. """ - pred = [[0,1,0], [1,0,0]] - truth = [[1,0,0], [0,1,0]] + pred = [[0, 1, 0], [1, 0, 0]] + truth = [[1, 0, 0], [0, 1, 0]] brier_obj = Brier(pred, truth) result = brier_obj.evaluate() - expected = 2. + expected = 2.0 assert np.isclose(result, expected) def test_brier_min_result(self): @@ -57,19 +60,19 @@ def test_brier_min_result(self): Base case where prediction is perfect, should produce minimum possible result value, 0. """ - pred = [[1,0,0], [0,1,0]] - truth = [[1,0,0], [0,1,0]] + pred = [[1, 0, 0], [0, 1, 0]] + truth = [[1, 0, 0], [0, 1, 0]] brier_obj = Brier(pred, truth) result = brier_obj.evaluate() - expected = 0. + expected = 0.0 assert np.isclose(result, expected) def test_brier_input_arrays_different_sizes(self): """ Verify exception is raised when input arrays are different sizes. """ - pred = [[1,0,0], [0,1,0]] - truth = [[1,0,0], [0,1,0], [0,0,0]] + pred = [[1, 0, 0], [0, 1, 0]] + truth = [[1, 0, 0], [0, 1, 0], [0, 0, 0]] brier_obj = Brier(pred, truth) with self.assertRaises(ValueError): _ = brier_obj.evaluate() @@ -79,7 +82,7 @@ def test_brier_with_garbage_prediction_input(self): Verify exception is raised when prediction input is non-numeric. """ pred = ["foo", "bar"] - truth = [[1,0,0],[0,1,0]] + truth = [[1, 0, 0], [0, 1, 0]] brier_obj = Brier(pred, truth) with self.assertRaises(TypeError): _ = brier_obj.evaluate() @@ -88,7 +91,7 @@ def test_brier_with_garbage_truth_input(self): """ Verify exception is raised when truth input is non-numeric. """ - pred = [[1,0,0], [0,1,0]] + pred = [[1, 0, 0], [0, 1, 0]] truth = ["hello sky", "goodbye ground"] brier_obj = Brier(pred, truth) with self.assertRaises(TypeError): @@ -100,26 +103,30 @@ def test_brier_prediction_does_not_sum_to_one(self): also verifies that while the total sum of values in the prediction array sum to 2, the individual rows do not, and thus logs a warning """ - pred = [[1,0.0001,0], [0,0.9999,0]] - truth = [[1,0,0], [0,1,0]] - LOGGER.info('Testing now...') + pred = [[1, 0.0001, 0], [0, 0.9999, 0]] + truth = [[1, 0, 0], [0, 1, 0]] + LOGGER.info("Testing now...") brier_obj = Brier(pred, truth) with self.assertLogs() as captured: _ = brier_obj.evaluate() - self.assertEqual(captured.records[0].getMessage(), "Input predictions do not sum to 1.") + self.assertEqual( + captured.records[0].getMessage(), "Input predictions do not sum to 1." + ) def test_brier_1d_prediction_does_not_sum_to_one(self): """ Verify exception is raised when 1d prediction input rows don't sum to 1 """ - pred = [0.3,0.8,0] - truth = [1,0,0] - LOGGER.info('Testing now...') + pred = [0.3, 0.8, 0] + truth = [1, 0, 0] + LOGGER.info("Testing now...") brier_obj = Brier(pred, truth) with self.assertLogs(level=logging.WARNING) as captured: - # with caplog.at_level(logging.WARNING): + # with caplog.at_level(logging.WARNING): _ = brier_obj.evaluate() - self.assertEqual(captured.records[0].getMessage(), "Input predictions do not sum to 1.") + self.assertEqual( + captured.records[0].getMessage(), "Input predictions do not sum to 1." + ) def test_brier_1d(self): """ @@ -127,9 +134,9 @@ def test_brier_1d(self): condition in brier._calculate_metric that changes the axis upon which the np.mean operates. """ - pred = [1,0,0] - truth = [1,0,0] + pred = [1, 0, 0] + truth = [1, 0, 0] brier_obj = Brier(pred, truth) result = brier_obj.evaluate() - expected = 0. + expected = 0.0 assert np.isclose(result, expected) diff --git a/tests/qp/test_ensemble.py b/tests/qp/test_ensemble.py index 5cc1415e..46a0cbe6 100644 --- a/tests/qp/test_ensemble.py +++ b/tests/qp/test_ensemble.py @@ -2,21 +2,19 @@ Unit tests for PDF class """ import copy -import logging -import numpy as np +import os import unittest + +import numpy as np + import qp from qp import test_data -import os -import sys - -from qp.test_funcs import assert_all_small, assert_all_close, build_ensemble from qp.plotting import init_matplotlib -from qp.quantile_pdf_constructors import AbstractQuantilePdfConstructor +from qp.test_funcs import assert_all_close, assert_all_small, build_ensemble class EnsembleTestCase(unittest.TestCase): - """ Class to test qp.Ensemble functionality """ + """Class to test qp.Ensemble functionality""" def setUp(self): """ @@ -35,20 +33,22 @@ def _run_ensemble_funcs(ens, xpts): logpdfs = ens.logpdf(xpts) logcdfs = ens.logcdf(xpts) - if hasattr(ens.gen_obj, 'npdf'): + if hasattr(ens.gen_obj, "npdf"): assert ens.npdf == ens.gen_obj.npdf - with np.errstate(all='ignore'): + with np.errstate(all="ignore"): assert np.allclose(np.log(pdfs), logpdfs, atol=1e-9) assert np.allclose(np.log(cdfs), logcdfs, atol=1e-9) binw = xpts[1:] - xpts[0:-1] - check_cdf = ((pdfs[:,0:-1] + pdfs[:,1:]) * binw /2).cumsum(axis=1) - cdfs[:,1:] + check_cdf = ((pdfs[:, 0:-1] + pdfs[:, 1:]) * binw / 2).cumsum(axis=1) - cdfs[ + :, 1: + ] assert_all_small(check_cdf, atol=5e-2, test_name="cdf") hist = ens.histogramize(xpts)[1] hist_check = ens.frozen.histogramize(xpts)[1] - assert_all_small(hist-hist_check, atol=1e-5, test_name="hist") + assert_all_small(hist - hist_check, atol=1e-5, test_name="hist") ppfs = ens.ppf(test_data.QUANTS) check_ppf = ens.cdf(ppfs) - test_data.QUANTS @@ -56,18 +56,18 @@ def _run_ensemble_funcs(ens, xpts): sfs = ens.sf(xpts) check_sf = sfs + cdfs - assert_all_small(check_sf-1, atol=2e-2, test_name="sf") + assert_all_small(check_sf - 1, atol=2e-2, test_name="sf") _ = ens.isf(test_data.QUANTS) check_isf = ens.cdf(ppfs) + test_data.QUANTS[::-1] - assert_all_small(check_isf-1, atol=2e-2, test_name="isf") + assert_all_small(check_isf - 1, atol=2e-2, test_name="isf") samples = ens.rvs(size=1000) assert samples.shape[0] == ens.frozen.npdf assert samples.shape[1] == 1000 median = ens.median() - mean = ens.mean() + mean = ens.mean() var = ens.var() std = ens.std() entropy = ens.entropy() @@ -90,15 +90,25 @@ def _run_ensemble_funcs(ens, xpts): assert interval[0].size == ens.npdf for N in range(3): - moment_partial = ens.moment_partial(N, limits=(test_data.XMIN, test_data.XMAX)) - calc_moment = qp.metrics.calculate_moment(ens, N, limits=(test_data.XMIN, test_data.XMAX)) - assert_all_close(moment_partial, calc_moment, rtol=5e-2, test_name="moment_partial_%i" % N) + moment_partial = ens.moment_partial( + N, limits=(test_data.XMIN, test_data.XMAX) + ) + calc_moment = qp.metrics.calculate_moment( + ens, N, limits=(test_data.XMIN, test_data.XMAX) + ) + assert_all_close( + moment_partial, + calc_moment, + rtol=5e-2, + test_name="moment_partial_%i" % N, + ) sps_moment = ens.moment(N) assert sps_moment.size == ens.npdf - #assert_all_close(sps_moment.flatten(), moment_partial.flatten(), rtol=5e-2, test_name="moment_%i" % N) - #pmf = ens.pmf(N) - #logpmf = ens.logpmf(N) + # assert_all_close(sps_moment.flatten(), moment_partial.flatten(), + # rtol=5e-2, test_name="moment_%i" % N) + # pmf = ens.pmf(N) + # logpmf = ens.logpmf(N) init_matplotlib() axes = ens.plot(xlim=(xpts[0], xpts[-1])) @@ -110,16 +120,19 @@ def _run_ensemble_funcs(ens, xpts): check_red = red_pdf - pdfs[0:5] assert_all_small(check_red, atol=1e-5, test_name="red") - if hasattr(ens.gen_obj, 'npdf'): # skip scipy norm + if hasattr(ens.gen_obj, "npdf"): # skip scipy norm commList = [None] try: - import mpi4py.MPI - commList.append(mpi4py.MPI.COMM_WORLD) + import mpi4py.MPI # pylint: disable=import-outside-toplevel + + commList.append(mpi4py.MPI.COMM_WORLD) # pylint: disable=c-extension-no-member except ImportError: pass for comm in commList: try: - group, fout = ens.initializeHdf5Write("testwrite.hdf5", ens.npdf, comm) + group, fout = ens.initializeHdf5Write( + "testwrite.hdf5", ens.npdf, comm + ) except TypeError: continue ens.writeHdf5Chunk(group, 0, ens.npdf) @@ -129,7 +142,6 @@ def _run_ensemble_funcs(ens, xpts): assert readens.objdata().keys() == ens.objdata().keys() os.remove("testwrite.hdf5") - @staticmethod def _run_merge_tests(ens, xpts): npdf = ens.npdf @@ -142,72 +154,74 @@ def _run_merge_tests(ens, xpts): modes = np.array([xpts[idx] for idx in np.squeeze(np.argmax(pdf_cat, axis=1))]) - ens_cat.set_ancil({"mode":modes}) + ens_cat.set_ancil({"mode": modes}) pdf_app = ens.pdf(xpts) - mask = np.concatenate([np.ones((npdf), 'bool'), np.zeros((npdf), 'bool')]) + mask = np.concatenate([np.ones((npdf), "bool"), np.zeros((npdf), "bool")]) ens_check = ens_cat[mask] pdf_check = ens_check.pdf(xpts) assert_all_close(pdf_cat, pdf_app, atol=5e-8, test_name="merge_1") assert_all_close(pdf_orig, pdf_check, atol=5e-8, test_name="merge_2") - assert_all_close(ens_cat.ancil['mode'][mask], modes[mask], atol=5e-8, test_name="mode") - + assert_all_close( + ens_cat.ancil["mode"][mask], modes[mask], atol=5e-8, test_name="mode" + ) def test_norm(self): - """ Run the ensemble tests on an ensemble of scipy.stats.norm distributions """ - key = 'norm' - cls_test_data = qp.stats.norm_gen.test_data[key] #pylint: disable=no-member + """Run the ensemble tests on an ensemble of scipy.stats.norm distributions""" + key = "norm" + cls_test_data = qp.stats.norm_gen.test_data[key] # pylint: disable=no-member ens_norm = build_ensemble(cls_test_data) - assert hasattr(ens_norm, 'gen_func') - assert isinstance(ens_norm.gen_obj, qp.stats.norm_gen) #pylint: disable=no-member - assert 'loc' in ens_norm.frozen.kwds - self._run_ensemble_funcs(ens_norm, cls_test_data['test_xvals']) - self._run_merge_tests(ens_norm, cls_test_data['test_xvals']) - + assert hasattr(ens_norm, "gen_func") + assert isinstance( + ens_norm.gen_obj, qp.stats.norm_gen # pylint: disable=no-member + ) + assert "loc" in ens_norm.frozen.kwds + self._run_ensemble_funcs(ens_norm, cls_test_data["test_xvals"]) + self._run_merge_tests(ens_norm, cls_test_data["test_xvals"]) def test_hist(self): - """ Run the ensemble tests on an ensemble of qp.hist distributions """ - key = 'hist' + """Run the ensemble tests on an ensemble of qp.hist distributions""" + key = "hist" qp.hist_gen.make_test_data() cls_test_data = qp.hist_gen.test_data[key] ens_h = build_ensemble(cls_test_data) assert isinstance(ens_h.gen_obj, qp.hist_gen) - self._run_ensemble_funcs(ens_h, cls_test_data['test_xvals']) - self._run_merge_tests(ens_h, cls_test_data['test_xvals']) + self._run_ensemble_funcs(ens_h, cls_test_data["test_xvals"]) + self._run_merge_tests(ens_h, cls_test_data["test_xvals"]) pdfs_mod = copy.copy(ens_h.dist.pdfs) - pdfs_mod[:,7] = 0.5*pdfs_mod[:,7] + pdfs_mod[:, 7] = 0.5 * pdfs_mod[:, 7] ens_h.update_objdata(dict(pdfs=pdfs_mod)) def test_interp(self): - """ Run the ensemble tests on an ensemble of qp.interp distributions """ - key = 'interp' + """Run the ensemble tests on an ensemble of qp.interp distributions""" + key = "interp" qp.interp_gen.make_test_data() cls_test_data = qp.interp_gen.test_data[key] ens_i = build_ensemble(cls_test_data) assert isinstance(ens_i.gen_obj, qp.interp_gen) - self._run_ensemble_funcs(ens_i, cls_test_data['test_xvals']) + self._run_ensemble_funcs(ens_i, cls_test_data["test_xvals"]) def test_packed_interp(self): - """ Run the ensemble tests on an ensemble of qp.packed_interp distributions """ - key = 'lin_packed_interp' + """Run the ensemble tests on an ensemble of qp.packed_interp distributions""" + key = "lin_packed_interp" qp.packed_interp_gen.make_test_data() cls_test_data = qp.packed_interp_gen.test_data[key] ens_i = build_ensemble(cls_test_data) assert isinstance(ens_i.gen_obj, qp.packed_interp_gen) - self._run_ensemble_funcs(ens_i, cls_test_data['test_xvals']) + self._run_ensemble_funcs(ens_i, cls_test_data["test_xvals"]) assert np.isfinite(ens_i.dist.yvals).all() def test_iterator(self): - """ Test the iterated read """ + """Test the iterated read""" QP_DIR = os.path.abspath(os.path.dirname(qp.__file__)) - data_file = os.path.join(QP_DIR, 'data', 'test.hdf5') + data_file = os.path.join(QP_DIR, "data", "test.hdf5") ens = qp.read(data_file) data_length = qp.data_length(data_file) assert data_length == ens.npdf itr = qp.iterator(data_file, 10) - test_grid = np.linspace(0., 1., 11) + test_grid = np.linspace(0.0, 1.0, 11) for start, end, ens_i in itr: check_vals = ens[start:end].pdf(test_grid) test_vals = ens_i.pdf(test_grid) @@ -215,11 +229,12 @@ def test_iterator(self): def test_mixmod_with_negative_weights(self): """Verify that an exception is raised when setting up a mixture model with negative weights""" - means = np.array([0.5,1.1, 2.9]) - sigmas = np.array([0.15,0.13,0.14]) - weights = np.array([1,0.5,-0.25]) + means = np.array([0.5, 1.1, 2.9]) + sigmas = np.array([0.15, 0.13, 0.14]) + weights = np.array([1, 0.5, -0.25]) with self.assertRaises(ValueError): _ = qp.mixmod(weights=weights, means=means, stds=sigmas) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/qp/test_eval_funcs.py b/tests/qp/test_eval_funcs.py index ecc59c31..c3b967a8 100644 --- a/tests/qp/test_eval_funcs.py +++ b/tests/qp/test_eval_funcs.py @@ -1,25 +1,31 @@ """ Unit tests for PDF class """ -import numpy as np import unittest + +import numpy as np + import qp -class EvalFuncsTestCase(unittest.TestCase): - """ Tests of evaluations and interpolation functions """ +class EvalFuncsTestCase(unittest.TestCase): # pylint: disable=too-many-instance-attributes + """Tests of evaluations and interpolation functions""" def setUp(self): """ Make any objects that are used in multiple tests. """ self.xpts = np.linspace(0, 3, 7) - self.hpdfs = np.random.random((10, 50)) #pylint: disable=no-member + self.hpdfs = np.random.random((10, 50)) # pylint: disable=no-member self.hbins = np.linspace(0, 5, 51) - self.hbins2 = np.linspace(0, 5, 51) + np.expand_dims(np.linspace(0.1, 1., 10), -1) + self.hbins2 = np.linspace(0, 5, 51) + np.expand_dims( + np.linspace(0.1, 1.0, 10), -1 + ) self.xvals = np.linspace(0, 5, 50) - self.xvals2 = np.linspace(0, 5, 50) + np.expand_dims(np.linspace(0.1, 1., 10), -1) + self.xvals2 = np.linspace(0, 5, 50) + np.expand_dims( + np.linspace(0.1, 1.0, 10), -1 + ) self.yvals1d = self.hpdfs[0] self.rows = np.expand_dims(np.arange(10), -1) @@ -31,7 +37,6 @@ def tearDown(self): "Clean up any mock data files created by the tests." def _check_interface_function(self, ifunc, xvals, yvals, **kwargs): - v0 = ifunc(self.xpts, self.rows, xvals, yvals, **kwargs) v1 = ifunc(self.grid.flatten(), self.rows.flatten(), xvals, yvals, **kwargs) v2 = ifunc(self.grid, self.rows, xvals, yvals, **kwargs) @@ -41,31 +46,47 @@ def _check_interface_function(self, ifunc, xvals, yvals, **kwargs): assert np.allclose(v0, v2) def test_evaluate_hist_x_multi_y(self): - """ Test the evaluate_hist_x_multi_y function """ - self._check_interface_function(qp.utils.evaluate_hist_x_multi_y, - self.hbins, self.hpdfs) + """Test the evaluate_hist_x_multi_y function""" + self._check_interface_function( + qp.utils.evaluate_hist_x_multi_y, self.hbins, self.hpdfs + ) def test_evaluate_hist_multi_x_multi_y(self): - """ Test the evaluate_hist_multi_x_multi_y function """ - self._check_interface_function(qp.utils.evaluate_hist_multi_x_multi_y, - self.hbins2, self.hpdfs) + """Test the evaluate_hist_multi_x_multi_y function""" + self._check_interface_function( + qp.utils.evaluate_hist_multi_x_multi_y, self.hbins2, self.hpdfs + ) def test_interpolate_x_multi_y(self): - """ Test the interpolate_x_multi_y """ - self._check_interface_function(qp.utils.interpolate_x_multi_y, - self.xvals, self.hpdfs, bounds_error=False, fill_value=0) + """Test the interpolate_x_multi_y""" + self._check_interface_function( + qp.utils.interpolate_x_multi_y, + self.xvals, + self.hpdfs, + bounds_error=False, + fill_value=0, + ) def test_interpolate_multi_x_multi_y(self): - """ Test the interpolate_multi_x_multi_y """ - self._check_interface_function(qp.utils.interpolate_multi_x_multi_y, - self.xvals2, self.hpdfs, bounds_error=False, fill_value=0) + """Test the interpolate_multi_x_multi_y""" + self._check_interface_function( + qp.utils.interpolate_multi_x_multi_y, + self.xvals2, + self.hpdfs, + bounds_error=False, + fill_value=0, + ) def test_interpolate_multi_x_y(self): - """ Test the interpolate_multi_x_y """ - self._check_interface_function(qp.utils.interpolate_multi_x_y, - self.xvals2, self.yvals1d, bounds_error=False, fill_value=0) - + """Test the interpolate_multi_x_y""" + self._check_interface_function( + qp.utils.interpolate_multi_x_y, + self.xvals2, + self.yvals1d, + bounds_error=False, + fill_value=0, + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/qp/test_infrastructure.py b/tests/qp/test_infrastructure.py index cf6536c1..bd578505 100644 --- a/tests/qp/test_infrastructure.py +++ b/tests/qp/test_infrastructure.py @@ -3,13 +3,14 @@ """ import os import unittest + import qp -from qp.test_funcs import build_ensemble from qp import test_data +from qp.test_funcs import build_ensemble class InfrastructureTestCase(unittest.TestCase): - """ Class with tests of infrastructure """ + """Class with tests of infrastructure""" def setUp(self): """ @@ -24,45 +25,46 @@ def tearDown(self): @staticmethod def test_print_factory(): - """ Test the print_factory method """ + """Test the print_factory method""" qp.instance().pretty_print() @staticmethod def test_slice_dict(): - """ Test the slice_dict method """ + """Test the slice_dict method""" orig_dict = dict(loc=test_data.LOC, scale=test_data.SCALE, scalar=1) sliced = qp.dict_utils.slice_dict(orig_dict, 1) - assert sliced['loc'] == test_data.LOC[1] - assert sliced['scale'] == test_data.SCALE[1] - assert sliced['scalar'] == 1 + assert sliced["loc"] == test_data.LOC[1] + assert sliced["scale"] == test_data.SCALE[1] + assert sliced["scalar"] == 1 @staticmethod def test_print_dict_shape(): - """ Test the print_dict_shape method """ + """Test the print_dict_shape method""" test_dict = dict(loc=test_data.LOC, scale=test_data.SCALE) qp.dict_utils.print_dict_shape(test_dict) @staticmethod def test_get_val_or_default(): - """ Test the get_val_or_default method """ + """Test the get_val_or_default method""" test_dict = dict(key=1) test_dict[None] = 2 - assert qp.dict_utils.get_val_or_default(test_dict, 'key') == 1 - assert qp.dict_utils.get_val_or_default(test_dict, 'nokey') == 2 + assert qp.dict_utils.get_val_or_default(test_dict, "key") == 1 + assert qp.dict_utils.get_val_or_default(test_dict, "nokey") == 2 assert qp.dict_utils.get_val_or_default(test_dict, None) == 2 - assert qp.dict_utils.set_val_or_default(test_dict, 'key', 5) == 1 + assert qp.dict_utils.set_val_or_default(test_dict, "key", 5) == 1 test_dict.pop(None) - assert qp.dict_utils.get_val_or_default(test_dict, 'nokey') is None + assert qp.dict_utils.get_val_or_default(test_dict, "nokey") is None def test_is_qp_file(self): - fname = 'norm_ensemble.hdf5' - norm_test_data = qp.stats.norm_gen.test_data['norm'] + fname = "norm_ensemble.hdf5" + norm_test_data = qp.stats.norm_gen.test_data["norm"] # pylint: disable=no-member ens_norm = build_ensemble(norm_test_data) ens_norm.write_to(fname) self.files.append(fname) assert qp.instance().is_qp_file(fname) - assert not qp.instance().is_qp_file('test_pit.py') + assert not qp.instance().is_qp_file("test_pit.py") + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/qp/test_metric_factory.py b/tests/qp/test_metric_factory.py index 768757ea..cf7ce208 100644 --- a/tests/qp/test_metric_factory.py +++ b/tests/qp/test_metric_factory.py @@ -5,7 +5,7 @@ Unit tests for PDF class """ import unittest -import numpy as np + import qp import qp.metrics @@ -18,13 +18,12 @@ def setUp(self): Setup an objects that are used in multiple tests. """ qp.metrics.update_metrics() - + def test_print_metrics(self): """Test printing the metrics.""" qp.metrics.print_metrics() qp.metrics.print_metrics(force_update=True) - def test_list_metrics(self): """Test printing the metrics.""" the_list = qp.metrics.list_metrics() @@ -39,10 +38,10 @@ def test_create_metrics(self): a_metric = qp.metrics.create_metric(metric_name) assert a_metric.metric_name == metric_name a_metric = qp.metrics.create_metric("outlier", force_update=True) - assert a_metric.metric_name == 'outlier' - + assert a_metric.metric_name == "outlier" + def test_bad_metric_name(self): - """ Catch error on making a bad metric """ + """Catch error on making a bad metric""" with self.assertRaises(KeyError): qp.metrics.create_metric("Bad Metric") diff --git a/tests/qp/test_metrics.py b/tests/qp/test_metrics.py index dcb58a8f..0ccb762e 100644 --- a/tests/qp/test_metrics.py +++ b/tests/qp/test_metrics.py @@ -5,33 +5,22 @@ Unit tests for PDF class """ import unittest + import numpy as np + import qp import qp.metrics - from qp import test_funcs from qp.metrics.array_metrics import quick_rbpe +from qp.metrics.concrete_metric_classes import (BrierMetric, KLDMetric, + MomentMetric, OutlierMetric, + RBPEMetric, RMSEMetric) from qp.metrics.metrics import ( - _calculate_grid_parameters, - _check_ensemble_is_not_nested, + _calculate_grid_parameters, _check_ensemble_is_not_nested, _check_ensembles_are_same_size, - _check_ensembles_contain_correct_number_of_distributions, - calculate_brier, - calculate_goodness_of_fit, - calculate_kld, - calculate_moment, - calculate_outlier_rate, - calculate_rbpe, - calculate_rmse, -) -from qp.metrics.concrete_metric_classes import ( - BrierMetric, - KLDMetric, - MomentMetric, - OutlierMetric, - RBPEMetric, - RMSEMetric, -) + _check_ensembles_contain_correct_number_of_distributions, calculate_brier, + calculate_goodness_of_fit, calculate_kld, calculate_moment, + calculate_outlier_rate, calculate_rbpe, calculate_rmse) from qp.utils import epsilon diff --git a/tests/qp/test_parallel.py b/tests/qp/test_parallel.py index fa415d79..b592c80f 100644 --- a/tests/qp/test_parallel.py +++ b/tests/qp/test_parallel.py @@ -1,24 +1,23 @@ """ Unit tests for PDF class """ -import copy -import numpy as np -import unittest -import qp -from qp import test_data import os -import sys -import pytest +import unittest -from qp.test_funcs import assert_all_small, assert_all_close, build_ensemble -from qp.plotting import init_matplotlib -from mpi4py import MPI import h5py +import numpy as np +import pytest +from mpi4py import MPI + +import qp +from qp.test_funcs import build_ensemble -@pytest.mark.skipif(not h5py.get_config().mpi, reason="Do not have parallel version of hdf5py") +@pytest.mark.skipif( + not h5py.get_config().mpi, reason="Do not have parallel version of hdf5py" +) class EnsembleTestCase(unittest.TestCase): - """ Class to test qp.Ensemble functionality """ + """Class to test qp.Ensemble functionality""" def setUp(self): """ @@ -29,54 +28,56 @@ def tearDown(self): "Clean up any mock data files created by the tests." @staticmethod - def _run_ensemble_funcs(ens_type, ens, xpts): + def _run_ensemble_funcs(ens_type, ens, _xpts): """Run the test for a practicular class""" - comm = MPI.COMM_WORLD + comm = MPI.COMM_WORLD # pylint: disable=c-extension-no-member mpi_rank = comm.Get_rank() mpi_size = comm.Get_size() - zmode = ens.mode(grid=np.linspace(-3,3,100)) - ens.set_ancil(dict(zmode=zmode,ones=np.ones(ens.npdf))) - group, fout = ens.initializeHdf5Write(f"testwrite_{ens_type}.hdf5", ens.npdf*mpi_size, comm) - ens.writeHdf5Chunk(group, mpi_rank*ens.npdf, (mpi_rank+1)*ens.npdf) + zmode = ens.mode(grid=np.linspace(-3, 3, 100)) + ens.set_ancil(dict(zmode=zmode, ones=np.ones(ens.npdf))) + group, fout = ens.initializeHdf5Write( + f"testwrite_{ens_type}.hdf5", ens.npdf * mpi_size, comm + ) + ens.writeHdf5Chunk(group, mpi_rank * ens.npdf, (mpi_rank + 1) * ens.npdf) ens.finalizeHdf5Write(fout) readens = qp.read(f"testwrite_{ens_type}.hdf5") - assert sum(readens.ancil['ones']) == mpi_size*ens.npdf - assert len(readens.ancil['zmode']) == mpi_size*ens.npdf + assert sum(readens.ancil["ones"]) == mpi_size * ens.npdf + assert len(readens.ancil["zmode"]) == mpi_size * ens.npdf assert readens.metadata().keys() == ens.metadata().keys() assert readens.objdata().keys() == ens.objdata().keys() - test_grid = grid=np.linspace(-3,3,100) + test_grid = np.linspace(-3, 3, 100) itr = qp.iterator(f"testwrite_{ens_type}.hdf5", 10, mpi_rank, mpi_size) for start, end, ens_i in itr: assert np.allclose(readens[start:end].pdf(test_grid), ens_i.pdf(test_grid)) - + if mpi_rank == 0: os.remove(f"testwrite_{ens_type}.hdf5") def test_parallel_norm(self): - """ Run the ensemble tests on an ensemble of scipy.stats.norm distributions """ - key = 'norm' - cls_test_data = qp.stats.norm_gen.test_data[key] #pylint: disable=no-member + """Run the ensemble tests on an ensemble of scipy.stats.norm distributions""" + key = "norm" + cls_test_data = qp.stats.norm_gen.test_data[key] # pylint: disable=no-member ens_norm = build_ensemble(cls_test_data) - self._run_ensemble_funcs('norm', ens_norm, cls_test_data['test_xvals']) + self._run_ensemble_funcs("norm", ens_norm, cls_test_data["test_xvals"]) def test_parallel_hist(self): - """ Run the ensemble tests on an ensemble of qp.hist distributions """ - key = 'hist' + """Run the ensemble tests on an ensemble of qp.hist distributions""" + key = "hist" qp.hist_gen.make_test_data() cls_test_data = qp.hist_gen.test_data[key] ens_h = build_ensemble(cls_test_data) - self._run_ensemble_funcs('hist', ens_h, cls_test_data['test_xvals']) + self._run_ensemble_funcs("hist", ens_h, cls_test_data["test_xvals"]) def test_parallel_interp(self): - """ Run the ensemble tests on an ensemble of qp.interp distributions """ - key = 'interp' + """Run the ensemble tests on an ensemble of qp.interp distributions""" + key = "interp" qp.interp_gen.make_test_data() cls_test_data = qp.interp_gen.test_data[key] ens_i = build_ensemble(cls_test_data) - self._run_ensemble_funcs('interp', ens_i, cls_test_data['test_xvals']) + self._run_ensemble_funcs("interp", ens_i, cls_test_data["test_xvals"]) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/qp/test_pit.py b/tests/qp/test_pit.py index 4ce4e1f3..e8126bc9 100644 --- a/tests/qp/test_pit.py +++ b/tests/qp/test_pit.py @@ -2,13 +2,14 @@ # pylint: disable=protected-access import unittest + import numpy as np + import qp from qp import interp_gen from qp.ensemble import Ensemble -from qp.metrics.pit import PIT from qp.metrics.concrete_metric_classes import PITMetric - +from qp.metrics.pit import PIT # constants for tests NMAX = 2.5 diff --git a/tests/qp/test_point_metrics.py b/tests/qp/test_point_metrics.py index 8d72312f..ec803ffa 100644 --- a/tests/qp/test_point_metrics.py +++ b/tests/qp/test_point_metrics.py @@ -1,15 +1,12 @@ import numpy as np -import qp - -from qp.metrics.point_estimate_metric_classes import ( - PointStatsEz, - PointBias, - PointOutlierRate, - PointSigmaIQR, - PointSigmaMAD, -) +import qp from qp.metrics.concrete_metric_classes import CDELossMetric +from qp.metrics.point_estimate_metric_classes import (PointBias, + PointOutlierRate, + PointSigmaIQR, + PointSigmaMAD, + PointStatsEz) # values for metrics OUTRATE = 0.0 @@ -37,8 +34,7 @@ def construct_test_ensemble(): def test_point_metrics(): - """Basic tests for the various point estimate metrics - """ + """Basic tests for the various point estimate metrics""" zgrid, zspec, pdf_ens, true_ez = construct_test_ensemble() zb = pdf_ens.mode(grid=zgrid).flatten() @@ -60,8 +56,7 @@ def test_point_metrics(): def test_cde_loss_metric(): - """Basic test to ensure that the CDE Loss metric class is working. - """ + """Basic test to ensure that the CDE Loss metric class is working.""" zgrid, zspec, pdf_ens, _ = construct_test_ensemble() cde_loss_class = CDELossMetric(zgrid) result = cde_loss_class.evaluate(pdf_ens, zspec) diff --git a/tests/qp/test_scipy_vectorization.py b/tests/qp/test_scipy_vectorization.py index 66f99e90..ca5eaf9a 100644 --- a/tests/qp/test_scipy_vectorization.py +++ b/tests/qp/test_scipy_vectorization.py @@ -4,16 +4,13 @@ """ import time import unittest + import numpy as np + import qp import qp.metrics - from qp import test_funcs -from qp.metrics.concrete_metric_classes import ( - ADMetric, - CvMMetric, - KSMetric, -) +from qp.metrics.concrete_metric_classes import ADMetric, CvMMetric, KSMetric class ScipyVectorizationTests(unittest.TestCase): diff --git a/tests/qp/test_utils.py b/tests/qp/test_utils.py index 03830f3d..585ca11a 100644 --- a/tests/qp/test_utils.py +++ b/tests/qp/test_utils.py @@ -1,14 +1,16 @@ """ Unit tests for PDF class """ -import numpy as np import unittest -import qp +import numpy as np + +import qp from qp import test_data + class UtilsTestCase(unittest.TestCase): - """ Test the utility functions """ + """Test the utility functions""" def setUp(self): """ @@ -16,51 +18,74 @@ def setUp(self): """ def tearDown(self): - """ Clean up any mock data files created by the tests. """ + """Clean up any mock data files created by the tests.""" @staticmethod def test_profile(): - """ Test the utils.profile function """ + """Test the utils.profile function""" npdf = 100 x_bins = test_data.XBINS x_cents = qp.utils.edge_to_center(x_bins) nbin = x_cents.size - x_data = (np.ones((npdf, 1))*x_cents).T + x_data = (np.ones((npdf, 1)) * x_cents).T c_vals = np.linspace(0.5, 2.5, nbin) - y_data = np.expand_dims(c_vals, -1)*(0.95 + 0.1*np.random.uniform(size=(nbin, npdf))) + y_data = np.expand_dims(c_vals, -1) * ( + 0.95 + 0.1 * np.random.uniform(size=(nbin, npdf)) + ) pf_1 = qp.utils.profile(x_data.flatten(), y_data.flatten(), x_bins, std=False) pf_2 = qp.utils.profile(x_data.flatten(), y_data.flatten(), x_bins, std=True) - qp.test_funcs.assert_all_close(pf_1[0], c_vals, atol=0.02, test_name="profile_mean") + qp.test_funcs.assert_all_close( + pf_1[0], c_vals, atol=0.02, test_name="profile_mean" + ) qp.test_funcs.assert_all_close(pf_1[0], pf_2[0], test_name="profile_check") - qp.test_funcs.assert_all_close(pf_1[1], c_vals/npdf*np.sqrt(12), atol=0.2, test_name="profile_std") - qp.test_funcs.assert_all_close(pf_1[1], 0.1*pf_2[1], test_name="profile_err") + qp.test_funcs.assert_all_close( + pf_1[1], c_vals / npdf * np.sqrt(12), atol=0.2, test_name="profile_std" + ) + qp.test_funcs.assert_all_close(pf_1[1], 0.1 * pf_2[1], test_name="profile_err") def test_sparse(self): - """ Test the sparse representation """ + """Test the sparse representation""" - xvals = np.linspace(0,1,101) - #assert basic construction + xvals = np.linspace(0, 1, 101) + # assert basic construction A = qp.sparse_rep.create_voigt_basis(xvals, (0, 1), 11, (0.01, 0.5), 10, 10) self.assertEqual(A.shape, (101, 1100)) - #check consistency of a constrained case od voigt basis - pdf0 = np.exp(-((xvals - 0.5) ** 2) / (2.* 0.01)) / (np.sqrt(2*np.pi)*0.1) - pdf2 = qp.sparse_rep.shapes2pdf([1,], [0.5,], [0.1,], [0,], dict(xvals=xvals), cut=1.e-7) + # check consistency of a constrained case od voigt basis + pdf0 = np.exp(-((xvals - 0.5) ** 2) / (2.0 * 0.01)) / (np.sqrt(2 * np.pi) * 0.1) + pdf2 = qp.sparse_rep.shapes2pdf( + [ + 1, + ], + [ + 0.5, + ], + [ + 0.1, + ], + [ + 0, + ], + dict(xvals=xvals), + cut=1.0e-7, + ) self.assertTrue(np.allclose(pdf2, pdf0)) - A = qp.sparse_rep.create_voigt_basis(xvals, (0.5,0.5), 1, (0.1,0.1), 1, 1) + A = qp.sparse_rep.create_voigt_basis(xvals, (0.5, 0.5), 1, (0.1, 0.1), 1, 1) pdf1 = np.squeeze(A) * np.sqrt((pdf0**2).sum()) - self.assertTrue(np.allclose(pdf1,pdf0)) - #NSparse set to 2 so that unit testing goes through more code in sparse_basis - ALL, bigD, _ = qp.sparse_rep.build_sparse_representation(xvals, [pdf0], (0.5,0.5), 1, (0.1,0.1), 1, 1, 2) + self.assertTrue(np.allclose(pdf1, pdf0)) + # NSparse set to 2 so that unit testing goes through more code in sparse_basis + ALL, bigD, _ = qp.sparse_rep.build_sparse_representation( + xvals, [pdf0], (0.5, 0.5), 1, (0.1, 0.1), 1, 1, 2 + ) va, ma, sa, ga = qp.sparse_rep.indices2shapes(ALL, bigD) - self.assertEqual(va[:, 1], 0.) - self.assertEqual([va[:, 0], ma[:, 0], sa[:, 0], ga[:, 0]], [1., 0.5, 0.1, 0.]) - #check default values + self.assertEqual(va[:, 1], 0.0) + self.assertEqual([va[:, 0], ma[:, 0], sa[:, 0], ga[:, 0]], [1.0, 0.5, 0.1, 0.0]) + # check default values ALL, bigD, A = qp.sparse_rep.build_sparse_representation(xvals, [pdf0]) - self.assertEqual(bigD['mu'], [min(xvals), max(xvals)]) - self.assertEqual(bigD['dims'][0], len(xvals)) + self.assertEqual(bigD["mu"], [min(xvals), max(xvals)]) + self.assertEqual(bigD["dims"][0], len(xvals)) pdf_rec = qp.sparse_rep.pdf_from_sparse(ALL, A, xvals) self.assertTrue(np.allclose(pdf_rec[:, 0], pdf0, atol=1.5e-2)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main()