Skip to content

Commit

Permalink
lazy pytdigest (#234)
Browse files Browse the repository at this point in the history
* lazy pytdigest

* fix quote in pyproject.toml

* fix deprecation warnings

* pin numpy<2.0.0

* Use module imports rather than relative imports

---------

Co-authored-by: Sidney Mau <[email protected]>
  • Loading branch information
eacharles and sidneymau authored Aug 2, 2024
1 parent 03a3c3b commit f27f36b
Show file tree
Hide file tree
Showing 8 changed files with 19 additions and 17 deletions.
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@ classifiers = [
dynamic = ["version"]

dependencies = [
"numpy",
"numpy<2.0.0",
"scipy",
"tables-io",
"deprecated",
"pytdigest",
]

# On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes)
Expand All @@ -37,6 +36,7 @@ dev = [
"packaging",
"pillow",
"cycler",
"pytdigest",
"python-dateutil",
"kiwisolver",
"joblib",
Expand All @@ -48,11 +48,13 @@ dev = [
]
full = [
"tables-io[full]",
"pytdigest",
"matplotlib",
"scikit-learn",
]
all = [
"tables-io[full]",
"pytdigest",
"matplotlib",
"scikit-learn",
]
Expand Down
4 changes: 2 additions & 2 deletions src/qp/conversion_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def extract_voigt_xy_sparse(in_dist, **kwargs): # pragma: no cover
newz = np.linspace(minz, maxz, nz)
interp = sciinterp.interp1d(z, yvals, assume_sorted=True)
newpdf = interp(newz)
newpdf = newpdf / sciint.trapz(newpdf, newz).reshape(-1, 1)
newpdf = newpdf / sciint.trapezoid(newpdf, newz).reshape(-1, 1)
ALL, bigD, _ = build_sparse_representation(newz, newpdf)
return dict(indices=ALL, metadata=bigD)

Expand Down Expand Up @@ -410,7 +410,7 @@ def extract_xy_sparse(in_dist, **kwargs): # pragma: no cover
# normalize and sum the weighted pdfs
x = sparse_meta["z"]
y = pdf_y.sum(axis=-1)
norms = sciint.trapz(y.T, x)
norms = sciint.trapezoid(y.T, x)
y /= norms
# super(sparse_gen, self).__init__(x, y.T, *args, **kwargs)
xvals = x
Expand Down
1 change: 1 addition & 0 deletions src/qp/lazy_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
mpl = lazyImport("matplotlib")
plt = lazyImport("matplotlib.pyplot")
mixture = lazyImport("sklearn.mixture")
pytdigest = lazyImport("pytdigest")
6 changes: 3 additions & 3 deletions src/qp/metrics/concrete_metric_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from qp.metrics.pit import PIT

from pytdigest import TDigest
from qp.lazy_modules import pytdigest
from functools import reduce
from operator import add

Expand Down Expand Up @@ -54,7 +54,7 @@ def finalize(self, centroids: np.ndarray = []):
`compute_from_digest` method.
"""
digests = (
TDigest.of_centroids(np.array(centroid), compression=self._tdigest_compression)
pytdigest.TDigest.of_centroids(np.array(centroid), compression=self._tdigest_compression)
for centroid in centroids
)
digest = reduce(add, digests)
Expand Down Expand Up @@ -277,7 +277,7 @@ def evaluate(self, estimate, reference):

def accumulate(self, estimate, reference):
pit_samples = PIT(estimate, reference, self._eval_grid)._gather_pit_samples(estimate, reference)
digest = TDigest.compute(pit_samples, compression=self._tdigest_compression)
digest = pytdigest.TDigest.compute(pit_samples, compression=self._tdigest_compression)
centroids = digest.get_centroids()
return centroids

Expand Down
7 changes: 3 additions & 4 deletions src/qp/metrics/point_estimate_metric_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
MetricOutputType,
PointToPointMetric,
)
from pytdigest import TDigest
from functools import reduce
from operator import add

from qp.lazy_modules import pytdigest

class PointToPointMetricDigester(PointToPointMetric):

Expand Down Expand Up @@ -35,7 +34,7 @@ def accumulate(self, estimate, reference):
centroid locations and weights.
"""
ez = (estimate - reference) / (1.0 + reference)
digest = TDigest.compute(ez, compression=self._tdigest_compression)
digest = pytdigest.TDigest.compute(ez, compression=self._tdigest_compression)
centroids = digest.get_centroids()
return centroids

Expand All @@ -56,7 +55,7 @@ def finalize(self, centroids: np.ndarray = []):
`compute_from_digest` method.
"""
digests = (
TDigest.of_centroids(np.array(centroid), compression=self._tdigest_compression)
pytdigest.TDigest.of_centroids(np.array(centroid), compression=self._tdigest_compression)
for centroid in centroids
)
digest = reduce(add, digests)
Expand Down
6 changes: 3 additions & 3 deletions src/qp/sparse_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(self, xvals, mu, sig, dims, sparse_indices, *args, **kwargs): # py
# normalize and sum the weighted pdfs
x = sparse_meta["xvals"]
y = pdf_y.sum(axis=-1)
norms = sciint.trapz(y.T, x)
norms = sciint.trapezoid(y.T, x)
y /= norms
kwargs.setdefault("xvals", x)
kwargs.setdefault("yvals", y.T)
Expand Down Expand Up @@ -97,15 +97,15 @@ def build_test_data():
P = np.load(filein)
z = P[-1]
P = P[:NPDF]
P = P / sciint.trapz(P, z).reshape(-1, 1)
P = P / sciint.trapezoid(P, z).reshape(-1, 1)
minz = np.min(z)
nz = 301
_, j = np.where(P > 0)
maxz = np.max(z[j + 1])
newz = np.linspace(minz, maxz, nz)
interp = sciinterp.interp1d(z, P, assume_sorted=True)
newpdf = interp(newz)
newpdf = newpdf / sciint.trapz(newpdf, newz).reshape(-1, 1)
newpdf = newpdf / sciint.trapezoid(newpdf, newz).reshape(-1, 1)
sparse_idx, meta, _ = sparse_rep.build_sparse_representation(
newz, newpdf, verbose=False
)
Expand Down
4 changes: 2 additions & 2 deletions src/qp/sparse_rep.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def shapes2pdf(wa, ma, sa, ga, meta, cut=1.0e-5): # pylint: disable=too-many-ar
pdft = w * pdft / sla.norm(pdft)
pdf += pdft
pdf = np.where(pdf >= cut, pdf, 0.0)
return pdf / sciint.trapz(pdf, x)
return pdf / sciint.trapezoid(pdf, x)


def create_basis(metadata, cut=1.0e-5):
Expand Down Expand Up @@ -301,6 +301,6 @@ def pdf_from_sparse(sparse_indices, A, xvals, cut=1.0e-5):
pdf_y = (A[:, indices] * vals).sum(axis=-1)
pdf_y = np.where(pdf_y >= cut, pdf_y, 0.0)
pdf_x = xvals
norms = sciint.trapz(pdf_y.T, pdf_x)
norms = sciint.trapezoid(pdf_y.T, pdf_x)
pdf_y /= norms
return pdf_y
2 changes: 1 addition & 1 deletion src/qp/test_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def run_pdf_func_tests(test_class, test_data, short=False, check_props=True):

alloc_kwds = pdf.dist.get_allocation_kwds(pdf.npdf, **test_data["ctor_data"])
for key, val in alloc_kwds.items():
assert np.product(val[0]) == np.size(test_data["ctor_data"][key])
assert np.prod(val[0]) == np.size(test_data["ctor_data"][key])

return pdf_func_tests(pdf, test_data, short=short, check_props=check_props)

Expand Down

0 comments on commit f27f36b

Please sign in to comment.