Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce BetaGeoNBD Random Variable #1317

Merged
merged 42 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
817514d
Add BGNBD distribution and BGNBD Random Variable
PabloRoque Dec 22, 2024
4f6fd14
Add BGNBD excel test
PabloRoque Dec 25, 2024
715d5c4
Remove logp in terms of Potential
PabloRoque Dec 27, 2024
e2a0607
Rename BGNBD -> BetaGeoNBD
PabloRoque Dec 27, 2024
ca62118
Add logp and test matching lifetimes
PabloRoque Dec 28, 2024
ed4a0f0
Add logp param.type.ndim > 1. Add logp pt.switch. Related tests
PabloRoque Dec 30, 2024
e65aa6a
Add test_bg_nbd_sample_prior
PabloRoque Dec 31, 2024
b1b891e
Add _distribution_new_customers and related test. Rename population_d…
PabloRoque Jan 2, 2025
866ef8e
Adjust test_model_repr expected result to BetaGeoNBD instead of BGNBD
PabloRoque Jan 2, 2025
df04039
Improve distribution_new_customer, distribution_new_customer_purchase…
PabloRoque Jan 2, 2025
86364d8
Revert @pytest.mark.slow to in test_model_convergence
PabloRoque Jan 2, 2025
256b607
Revert distribution changes related to #1269
PabloRoque Jan 2, 2025
5bf47d7
Revert more changes related to #1269
PabloRoque Jan 2, 2025
7aa31f7
Revert BetaGeoBetaBinomRV changes
PabloRoque Jan 2, 2025
ef55998
Revert ParetoNBDRV changes
PabloRoque Jan 2, 2025
4f91de5
Docstring cleanup
PabloRoque Jan 2, 2025
3c5260c
Revert changes in ContContract dist
PabloRoque Jan 2, 2025
21cb076
Clean ContContract changes
PabloRoque Jan 2, 2025
55da4d1
Revert deletion _supp_shape_from_params
PabloRoque Jan 2, 2025
8beb7fc
Remove commented chunk on fit_result. Opted for data to standardize w…
PabloRoque Jan 2, 2025
4c46ae8
BetaGeoNBDRV as pre-#1269 definition
PabloRoque Jan 2, 2025
9356052
Fix test_numerically_stable_logp
PabloRoque Jan 3, 2025
14d3770
Merge branch 'main' into BGNBDRV
PabloRoque Jan 8, 2025
fc912d6
Overrride ModifiedBetaGeoModel.distribution_new_customer method
PabloRoque Jan 8, 2025
7b60fcf
Modify test_bg_nbd tensor parametrization to only vectors in test_bg_…
PabloRoque Jan 8, 2025
04952f3
Silence mypy in 2 lines
PabloRoque Jan 8, 2025
60c35d3
Merge branch 'main' into BGNBDRV
PabloRoque Jan 9, 2025
bcc8fa7
Adapt BetaGeoNBDRV to #1269
PabloRoque Jan 9, 2025
17818ae
Fix test_clv_fit_mcmc
PabloRoque Jan 9, 2025
15503ec
Modify sim_data to reflect the beta-distributed dropout process
PabloRoque Jan 10, 2025
db8c796
Add reference to BetaGeoNBD
PabloRoque Jan 10, 2025
378220e
Delete _logp
PabloRoque Jan 10, 2025
7b9508f
Delete commented weights param in test_bg_nbd
PabloRoque Jan 10, 2025
c4edfa9
Ammend BetaGeoNBD docstring
PabloRoque Jan 10, 2025
5d0db19
Fix BetaGeoNBD math
PabloRoque Jan 10, 2025
7d05269
Fix test_posterior_distributions to include dropout distributions
PabloRoque Jan 10, 2025
df6a304
Merge branch 'main' into BGNBDRV
PabloRoque Jan 10, 2025
ba3625c
Fix #NUM! docstring
PabloRoque Jan 10, 2025
911f8a0
Tweak sim_data
PabloRoque Jan 10, 2025
0db4e14
Merge branch 'main' into BGNBDRV
PabloRoque Jan 10, 2025
bb4e82b
Add co-author.
PabloRoque Jan 10, 2025
e9fe1ac
Merge branch 'main' into BGNBDRV
PabloRoque Jan 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 174 additions & 2 deletions pymc_marketing/clv/distributions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024 The PyMC Labs Developers
# Copyright 2025 The PyMC Labs Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -23,7 +23,13 @@
from pytensor.graph import vectorize_graph
from pytensor.tensor.random.op import RandomVariable

__all__ = ["BetaGeoBetaBinom", "ContContract", "ContNonContract", "ParetoNBD"]
__all__ = [
"BetaGeoBetaBinom",
"BetaGeoNBD",
"ContContract",
"ContNonContract",
"ParetoNBD",
]


class ContNonContractRV(RandomVariable):
Expand Down Expand Up @@ -662,3 +668,169 @@ def logp_customer_died(t_x_i, x_i, alpha_i, beta_i, gamma_i, delta_i, T_i):
delta > 0,
msg="alpha > 0, beta > 0, gamma > 0, delta > 0",
)


class BetaGeoNBDRV(RandomVariable):
name = "bg_nbd"
ndim_supp = 1
ndims_params = [0, 0, 0, 0, 0]

dtype = "floatX"
_print_name = ("BetaGeoNBD", "\\operatorname{BetaGeoNBD}")

wd60622 marked this conversation as resolved.
Show resolved Hide resolved
def make_node(self, rng, size, dtype, a, b, r, alpha, T):
a = pt.as_tensor_variable(a)
b = pt.as_tensor_variable(b)
r = pt.as_tensor_variable(r)
alpha = pt.as_tensor_variable(alpha)
T = pt.as_tensor_variable(T)

return super().make_node(rng, size, dtype, a, b, r, alpha, T)

def __call__(self, a, b, r, alpha, T, size=None, **kwargs):
return super().__call__(a, b, r, alpha, T, size=size, **kwargs)

@classmethod
def rng_fn(cls, rng, a, b, r, alpha, T, size):
if size is None:
size = np.broadcast_shapes(a.shape, b.shape, r.shape, alpha.shape, T.shape)

a = np.asarray(a)
b = np.asarray(b)
r = np.asarray(r)
alpha = np.asarray(alpha)
T = np.asarray(T)

if size == ():
size = np.broadcast_shapes(a.shape, b.shape, r.shape, alpha.shape, T.shape)

a = np.broadcast_to(a, size)
b = np.broadcast_to(b, size)
r = np.broadcast_to(r, size)
alpha = np.broadcast_to(alpha, size)
T = np.broadcast_to(T, size)

output = np.zeros(shape=size + (2,)) # noqa:RUF005

lam = rng.gamma(shape=r, scale=1 / alpha, size=size)
p = rng.beta(a=a, b=b, size=size)

def sim_data(lam, p, T):
t = 0
n = 0

dropout_time = rng.exponential(scale=1 / p)
wait = rng.exponential(scale=1 / lam)

final_t = min(dropout_time, T)
while (t + wait) < final_t:
t += wait
n += 1
wait = rng.exponential(scale=1 / lam)

return np.array(
[
t,
n,
],
)

ColtAllen marked this conversation as resolved.
Show resolved Hide resolved
for index in np.ndindex(*size):
output[index] = sim_data(lam[index], p[index], T[index])

return output

def _supp_shape_from_params(*args, **kwargs):
return (2,)


bg_nbd = BetaGeoNBDRV()


class BetaGeoNBD(PositiveContinuous):
r"""Population-level distribution class for a discrete, non-contractual, Beta-Geometric/Negative-Binomial process.

It is based on Fader, et al. in [1]_ and [2]_.

ColtAllen marked this conversation as resolved.
Show resolved Hide resolved
.. math::

\mathbb{L}(\r, \alpha, \a, \b | x, t_x, T) &=
A_1 \cdot A_2 \cdot (A_3 + \delta_{x>0} A_4)

where:
A_1 &= \frac{\Gamma(r+x)\alpha^r}{\Gamma(r)} \\
A_2 &= \frac{\Gamma(a+b)\Gamma(b+x)}{\Gamma(b)\Gamma(a+b+x)} \\
A_3 &= (\frac{1}{\alpha + T})^{r+x} \\
A_4 &= (\frac{a}{b+x-1})(\frac{1}{\alpha + t_x})^{r+x} \\

======== ===============================================
Support :math:`t_j >= 0` for :math:`j = 1, \dots,x`
Mean :math:`\mathbb{E}[X(n) | r, \alpha, a, b] = \frac{a+b-1}{a-1} \left[ 1 - (\frac{\alpha}{\alpha + T})^r \leftidx{_2}{F}{_1}(r,b;a+b-1;\frac{t}{\alpha + t}) \right] `
======== ===============================================

ColtAllen marked this conversation as resolved.
Show resolved Hide resolved
References
----------
.. [1] Fader, Peter S., Bruce G.S. Hardie, and Jen Shang (2010),
"Counting Your Customers" the Easy Way: An Alternative to the Pareto/NBD Model

Marketing Science, 24 (Spring), 275-284

.. [2] Implementing the BG/NBD Model for Customer Base Analysis in Excel http://brucehardie.com/notes/004/bgnbd_spreadsheet_note.pdf

""" # noqa: E501

rv_op = bg_nbd

@classmethod
def dist(cls, a, b, r, alpha, T, **kwargs):
"""Get the distribution from the parameters."""
return super().dist([a, b, r, alpha, T], **kwargs)

def logp(value, a, b, r, alpha, T):
"""Log-likelihood of the distribution."""
t_x = pt.atleast_1d(value[..., 0])
x = pt.atleast_1d(value[..., 1])

for param in (t_x, x, a, b, r, alpha, T):
if param.type.ndim > 1:
raise NotImplementedError(
f"BetaGeoNBD logp only implemented for vector parameters, got ndim={param.type.ndim}"
)

x_non_zero = x > 0

d1 = (
pt.gammaln(r + x)
- pt.gammaln(r)
+ pt.gammaln(a + b)
+ pt.gammaln(b + x)
- pt.gammaln(b)
- pt.gammaln(a + b + x)
)

d2 = r * pt.log(alpha) - (r + x) * pt.log(alpha + t_x)
c3 = ((alpha + t_x) / (alpha + T)) ** (r + x)
c4 = a / (b + x - 1)

logp = d1 + d2 + pt.log(c3 + pt.switch(x_non_zero, c4, 0))

logp = pt.switch(
pt.or_(
pt.or_(
pt.lt(t_x, 0),
pt.lt(x, 0),
),
pt.gt(t_x, T),
),
-np.inf,
logp,
)

return check_parameters(
logp,
a > 0,
b > 0,
alpha > 0,
r > 0,
msg="a > 0, b > 0, alpha > 0, r > 0",
)
Loading
Loading