Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce BetaGeoNBD Random Variable #1317

Merged
merged 42 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
817514d
Add BGNBD distribution and BGNBD Random Variable
PabloRoque Dec 22, 2024
4f6fd14
Add BGNBD excel test
PabloRoque Dec 25, 2024
715d5c4
Remove logp in terms of Potential
PabloRoque Dec 27, 2024
e2a0607
Rename BGNBD -> BetaGeoNBD
PabloRoque Dec 27, 2024
ca62118
Add logp and test matching lifetimes
PabloRoque Dec 28, 2024
ed4a0f0
Add logp param.type.ndim > 1. Add logp pt.switch. Related tests
PabloRoque Dec 30, 2024
e65aa6a
Add test_bg_nbd_sample_prior
PabloRoque Dec 31, 2024
b1b891e
Add _distribution_new_customers and related test. Rename population_d…
PabloRoque Jan 2, 2025
866ef8e
Adjust test_model_repr expected result to BetaGeoNBD instead of BGNBD
PabloRoque Jan 2, 2025
df04039
Improve distribution_new_customer, distribution_new_customer_purchase…
PabloRoque Jan 2, 2025
86364d8
Revert @pytest.mark.slow to in test_model_convergence
PabloRoque Jan 2, 2025
256b607
Revert distribution changes related to #1269
PabloRoque Jan 2, 2025
5bf47d7
Revert more changes related to #1269
PabloRoque Jan 2, 2025
7aa31f7
Revert BetaGeoBetaBinomRV changes
PabloRoque Jan 2, 2025
ef55998
Revert ParetoNBDRV changes
PabloRoque Jan 2, 2025
4f91de5
Docstring cleanup
PabloRoque Jan 2, 2025
3c5260c
Revert changes in ContContract dist
PabloRoque Jan 2, 2025
21cb076
Clean ContContract changes
PabloRoque Jan 2, 2025
55da4d1
Revert deletion _supp_shape_from_params
PabloRoque Jan 2, 2025
8beb7fc
Remove commented chunk on fit_result. Opted for data to standardize w…
PabloRoque Jan 2, 2025
4c46ae8
BetaGeoNBDRV as pre-#1269 definition
PabloRoque Jan 2, 2025
9356052
Fix test_numerically_stable_logp
PabloRoque Jan 3, 2025
14d3770
Merge branch 'main' into BGNBDRV
PabloRoque Jan 8, 2025
fc912d6
Overrride ModifiedBetaGeoModel.distribution_new_customer method
PabloRoque Jan 8, 2025
7b60fcf
Modify test_bg_nbd tensor parametrization to only vectors in test_bg_…
PabloRoque Jan 8, 2025
04952f3
Silence mypy in 2 lines
PabloRoque Jan 8, 2025
60c35d3
Merge branch 'main' into BGNBDRV
PabloRoque Jan 9, 2025
bcc8fa7
Adapt BetaGeoNBDRV to #1269
PabloRoque Jan 9, 2025
17818ae
Fix test_clv_fit_mcmc
PabloRoque Jan 9, 2025
15503ec
Modify sim_data to reflect the beta-distributed dropout process
PabloRoque Jan 10, 2025
db8c796
Add reference to BetaGeoNBD
PabloRoque Jan 10, 2025
378220e
Delete _logp
PabloRoque Jan 10, 2025
7b9508f
Delete commented weights param in test_bg_nbd
PabloRoque Jan 10, 2025
c4edfa9
Ammend BetaGeoNBD docstring
PabloRoque Jan 10, 2025
5d0db19
Fix BetaGeoNBD math
PabloRoque Jan 10, 2025
7d05269
Fix test_posterior_distributions to include dropout distributions
PabloRoque Jan 10, 2025
df6a304
Merge branch 'main' into BGNBDRV
PabloRoque Jan 10, 2025
ba3625c
Fix #NUM! docstring
PabloRoque Jan 10, 2025
911f8a0
Tweak sim_data
PabloRoque Jan 10, 2025
0db4e14
Merge branch 'main' into BGNBDRV
PabloRoque Jan 10, 2025
bb4e82b
Add co-author.
PabloRoque Jan 10, 2025
e9fe1ac
Merge branch 'main' into BGNBDRV
PabloRoque Jan 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 155 additions & 1 deletion pymc_marketing/clv/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@
from pytensor.graph import vectorize_graph
from pytensor.tensor.random.op import RandomVariable

__all__ = ["BetaGeoBetaBinom", "ContContract", "ContNonContract", "ParetoNBD"]
__all__ = [
"BetaGeoBetaBinom",
"BetaGeoNBD",
"ContContract",
"ContNonContract",
"ParetoNBD",
]


class ContNonContractRV(RandomVariable):
Expand Down Expand Up @@ -587,3 +593,151 @@ def logp_customer_died(t_x_i, x_i, alpha_i, beta_i, gamma_i, delta_i, T_i):
delta > 0,
msg="alpha > 0, beta > 0, gamma > 0, delta > 0",
)


class BetaGeoNBDRV(RandomVariable):
name = "bg_nbd"
signature = "(),(),(),(),()->(2)"

dtype = "floatX"
_print_name = ("BetaGeoNBD", "\\operatorname{BetaGeoNBD}")

def __call__(self, a, b, r, alpha, T, size=None, **kwargs):
return super().__call__(a, b, r, alpha, T, size=size, **kwargs)

@classmethod
def rng_fn(cls, rng, a, b, r, alpha, T, size):
if size is None:
size = np.broadcast_shapes(a.shape, b.shape, r.shape, alpha.shape, T.shape)

a = np.asarray(a)
b = np.asarray(b)
r = np.asarray(r)
alpha = np.asarray(alpha)
T = np.asarray(T)

if size == ():
size = np.broadcast_shapes(a.shape, b.shape, r.shape, alpha.shape, T.shape)

a = np.broadcast_to(a, size)
b = np.broadcast_to(b, size)
r = np.broadcast_to(r, size)
alpha = np.broadcast_to(alpha, size)
T = np.broadcast_to(T, size)

output = np.zeros(shape=size + (2,)) # noqa:RUF005

lam = rng.gamma(shape=r, scale=1 / alpha, size=size)
p = rng.beta(a=a, b=b, size=size)

def sim_data(lam, p, T):
t = 0 # recency
n = 0 # frequency

churn = 0 # BG/NBD assumes all non-repeat customers are active
wait = rng.exponential(scale=1 / lam)

while t + wait < T and not churn:
churn = rng.random() < p
n += 1
t += wait
wait = rng.exponential(scale=1 / lam)

return np.array([t, n])

for index in np.ndindex(*size):
output[index] = sim_data(lam[index], p[index], T[index])

return output


bg_nbd = BetaGeoNBDRV()


class BetaGeoNBD(PositiveContinuous):
r"""Population-level distribution class for a discrete, non-contractual, Beta-Geometric/Negative-Binomial process.
It is based on Fader, et al. in [1]_, [2]_ and [3]_.
.. math::
\mathbb{LL}(r, \alpha, a, b | x, t_x, T) =
D_1 + D_2 + \ln(C_3 + \delta_{x>0} C_4) \text{, where:} \\
\begin{align}
D_1 &= \ln \left[ \Gamma(r+x) \right] - \ln \left[ \Gamma(r) \right] + \ln \left[ \Gamma(a+b) \right] + \ln \left[ \Gamma(b+x) \right] \\
D_2 &= r \ln(\alpha) - (r+x) \ln(\alpha + t_x) \\
C_3 &= \left(\frac{\alpha + t_x}{\alpha + T} \right)^{r+x} \\
C_4 &= \left(\frac{a}{b+x-1} \right) \\
\end{align}
ColtAllen marked this conversation as resolved.
Show resolved Hide resolved
======== ===============================================
Support :math:`t_j >= 0` for :math:`j = 1, \dots,x`
Mean :math:`\mathbb{E}[X(n) | r, \alpha, a, b] = \frac{a+b-1}{a-1} \left[ 1 - \left(\frac{\alpha}{\alpha + T}\right)^r {_2}{F}{_1}(r,b;a+b-1;\frac{t}{\alpha + t}) \right]`
======== ===============================================
References
----------
.. [1] Fader, Peter S., Bruce G.S. Hardie, and Jen Shang (2010),
"Counting Your Customers" the Easy Way: An Alternative to the Pareto/NBD Model
Marketing Science, 24 (Spring), 275-284
.. [2] Implementing the BG/NBD Model for Customer Base Analysis in Excel http://brucehardie.com/notes/004/bgnbd_spreadsheet_note.pdf
.. [3] Overcoming the BG/NBD Model's #NUM! Error Problem https://brucehardie.com/notes/027/bgnbd_num_error.pdf
""" # noqa: E501

rv_op = bg_nbd

@classmethod
def dist(cls, a, b, r, alpha, T, **kwargs):
"""Get the distribution from the parameters."""
return super().dist([a, b, r, alpha, T], **kwargs)

def logp(value, a, b, r, alpha, T):
"""Log-likelihood of the distribution."""
t_x = pt.atleast_1d(value[..., 0])
x = pt.atleast_1d(value[..., 1])

for param in (t_x, x, a, b, r, alpha, T):
if param.type.ndim > 1:
raise NotImplementedError(
f"BetaGeoNBD logp only implemented for vector parameters, got ndim={param.type.ndim}"
)

x_non_zero = x > 0

d1 = (
pt.gammaln(r + x)
- pt.gammaln(r)
+ pt.gammaln(a + b)
+ pt.gammaln(b + x)
- pt.gammaln(b)
- pt.gammaln(a + b + x)
)

d2 = r * pt.log(alpha) - (r + x) * pt.log(alpha + t_x)
c3 = ((alpha + t_x) / (alpha + T)) ** (r + x)
c4 = a / (b + x - 1)

logp = d1 + d2 + pt.log(c3 + pt.switch(x_non_zero, c4, 0))

logp = pt.switch(
pt.or_(
pt.or_(
pt.lt(t_x, 0),
pt.lt(x, 0),
),
pt.gt(t_x, T),
),
-np.inf,
logp,
)

return check_parameters(
logp,
a > 0,
b > 0,
alpha > 0,
r > 0,
msg="a > 0, b > 0, alpha > 0, r > 0",
)
Loading
Loading