Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Add sample_params method to effects and proper Lift test likelihood #149

Merged
merged 3 commits into from
Dec 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 278 additions & 0 deletions docs/mmm/lifttest/index.ipy
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
# %% [markdown]
# # Lift test

import matplotlib.pyplot as plt
import numpy as np
# %%
import pandas as pd

index = pd.period_range("2000-01-01", "2005-01-01", freq="D")
rng = np.random.default_rng(0)

X = pd.DataFrame(
{
"investment1": np.cumsum(rng.normal(0, 1, size=len(index))),
"investment2": np.cumsum(rng.normal(0, 1, size=len(index))),
},
index=index,
)
X -= X.min()
X /= X.max()
X += 0.05

X["investment2"] = X["investment1"] + 0.1 + rng.normal(0, 0.01, size=len(index))
X.plot.line(alpha=0.9)

import numpyro.distributions as dist

# %%
from prophetverse.effects import (HillEffect, LinearEffect,
LinearFourierSeasonality)
from prophetverse.effects.trend import PiecewiseLinearTrend
from prophetverse.engine import MAPInferenceEngine
from prophetverse.engine.optimizer import LBFGSSolver
from prophetverse.sktime import Prophetverse
from prophetverse.utils.regex import exact, no_input_columns

model = Prophetverse(
trend=PiecewiseLinearTrend(
changepoint_interval=100,
changepoint_prior_scale=0.001,
changepoint_range=-100,
),
exogenous_effects=[
(
"seasonality",
LinearFourierSeasonality(
freq="D",
sp_list=[365.25],
fourier_terms_list=[3],
prior_scale=0.1,
effect_mode="multiplicative",
),
no_input_columns,
),
(
"investment1",
HillEffect(
half_max_prior=dist.HalfNormal(0.2),
slope_prior=dist.Gamma(2,1),
max_effect_prior=dist.HalfNormal(1.5),
effect_mode="additive",
),
exact("investment1"),
),
(
"investment2",
LinearEffect(
prior=dist.HalfNormal(0.5),
effect_mode="additive",
),
exact("investment2"),
),
],
inference_engine=MAPInferenceEngine(num_steps=1),
)


from prophetverse.experimental.simulate import simulate

samples = simulate(
model=model,
fh=X.index,
X=X,
)

# %%

y = pd.DataFrame(data={"sales" : samples["obs"][0].flatten()}, index=index)
true_effect = pd.DataFrame(
data={
"investment1": samples["investment1"][0].flatten(),
"investment2": samples["investment2"][0].flatten(),
},
index=index,

)

fig, ax = plt.subplots(figsize=(10, 5))
y.plot.line(ax=ax)
fig.show()

fig, ax = plt.subplots(figsize=(10, 5))
true_effect.plot.line(ax=ax)
fig.show()


# %%

import logging

from prophetverse.engine.optimizer import LBFGSSolver
from prophetverse.logger import logger

logging.basicConfig()
logging.root.setLevel(logging.DEBUG)

model = model.set_params(
inference_engine=MAPInferenceEngine(
num_steps=1000,
optimizer=LBFGSSolver(memory_size=100,
max_linesearch_steps=100)
))
model.fit(y=y, X=X)

components = model.predict_components(fh=index, X=X)


# %%

fig, ax = plt.subplots(figsize=(10, 5))
components["obs"].plot.line(ax=ax)
y.plot.line(ax=ax, color="black")
# %%

plt.figure()
plt.scatter(X["investment1"], components["investment1"])
plt.scatter(X["investment1"], true_effect["investment1"], color="black")


plt.figure()
plt.scatter(X["investment2"], components["investment2"])
plt.scatter(X["investment2"], true_effect["investment2"], color="black")


# %%
def get_simulated_lift_test(X, n=10):
X_b = X.copy()

for col in ["investment1", "investment2"]:


X_b[col] = X_b[col]*rng.uniform(0.1, 0.9, size=X.shape[0])

samples_b = simulate(
model=model.clone().set_params(inference_engine__num_steps=1),
fh=X.index,
X=X_b,
do={k: v[0] for k, v in samples.items()},
)

true_effect_b = pd.DataFrame(
index=X_b.index,
data={
"investment1": samples_b["investment1"][0].flatten(),
"investment2": samples_b["investment2"][0].flatten(),
},
)

lift = np.abs(true_effect_b - true_effect)

outs = []

for col in ["investment1", "investment2"]:
lift_test_dataframe = pd.DataFrame(
index=X.index,
data={
"lift": lift[col],
"x_start": X.loc[:, col],
"x_end": X_b.loc[:, col],
"y_start": true_effect.loc[:, col],
"y_end": true_effect_b.loc[:, col],
},
)
outs.append(lift_test_dataframe.sample(n=n, replace=False))

return tuple(outs)


lift_test_dataframe1, lift_test_dataframe2 = get_simulated_lift_test(X, n=7)
# %%

from prophetverse.effects.lift_likelihood import LiftExperimentLikelihood

lift_experiment_effect1 = LiftExperimentLikelihood(
effect=model.get_params()["investment1"],
lift_test_results=lift_test_dataframe1,
prior_scale=1e-2,
likelihood_scale=1,
)

lift_experiment_effect2 = LiftExperimentLikelihood(
effect=model.get_params()["investment2"],
lift_test_results=lift_test_dataframe2,
prior_scale=1e-2,
likelihood_scale=1,
)

# %%

from numpyro.infer.initialization import init_to_feasible, init_to_sample

from prophetverse.engine.optimizer import LBFGSSolver

new_model = model.clone()
new_model.set_params(
investment1=lift_experiment_effect1,
investment2=lift_experiment_effect2,
)
new_model.fit(y=y, X=X)

# %%
new_components = new_model.predict_components(fh=index, X=X)

# %%

fig, ax = plt.subplots(figsize=(10, 5))
components["obs"].plot.line(ax=ax)
y.plot.line(ax=ax, color="black")
new_components["obs"].plot.line(ax=ax)

# %%

plt.figure()
plt.scatter(X["investment1"], components["investment1"])
plt.scatter(X["investment1"], true_effect["investment1"], color="black")
plt.scatter(X["investment1"], new_components["investment1"], color="tab:orange")

for date in lift_test_dataframe1.index:
baseline = true_effect.loc[date, "investment1"]
plt.plot(
[
lift_test_dataframe1.loc[date, "x_start"],
lift_test_dataframe1.loc[date, "x_end"],
],
[
lift_test_dataframe1.loc[date, "y_start"],
lift_test_dataframe1.loc[date, "y_end"],
],
color="red",
alpha=0.8,
)
plt.show()


plt.figure()
plt.scatter(X["investment2"], components["investment2"])
plt.scatter(X["investment2"], true_effect["investment2"], color="black")
plt.scatter(X["investment2"], new_components["investment2"], color="tab:orange")


for date in lift_test_dataframe2.index:
baseline = true_effect.loc[date, "investment1"]
plt.plot(
[
lift_test_dataframe2.loc[date, "x_start"],
lift_test_dataframe2.loc[date, "x_end"],
],
[
lift_test_dataframe2.loc[date, "y_start"],
lift_test_dataframe2.loc[date, "y_end"],
],
color="red",
alpha=0.9,
)
plt.show()


# %%
81 changes: 75 additions & 6 deletions extension_templates/effect.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,66 @@
from prophetverse.utils.frame_to_array import series_to_tensor_or_array


class MySimpleEffectName(BaseEffect):
"""Base class for effects."""

_tags = {
# Supports multivariate data? Can this
# Effect be used with Multiariate prophet?
"supports_multivariate": False,
# If no columns are found, should
# _predict be skipped?
"skip_predict_if_no_match": True,
# Should only the indexes related to the forecasting horizon be passed to
# _transform?
"filter_indexes_with_forecating_horizon_at_transform": True,
}

def __init__(self, param1: Any, param2: Any):
self.param1 = param1
self.param2 = param2

def _sample_params(self, data, predicted_effects):
# call numpyro.sample to sample the parameters of the effect
# return a dictionary with the sampled parameters, where
# key is the name of the parameter and value is the sampled value
return {}

def _predict(
self,
data: Any,
predicted_effects: Dict[str, jnp.ndarray],
params: Dict[str, jnp.ndarray],
) -> jnp.ndarray:
"""Apply and return the effect values.

Parameters
----------
data : Any
Data obtained from the transformed method.

predicted_effects : Dict[str, jnp.ndarray], optional
A dictionary containing the predicted effects, by default None.

params : Dict[str, jnp.ndarray]
A dictionary containing the sampled parameters of the effect.

Returns
-------
jnp.ndarray
An array with shape (T,1) for univariate timeseries, or (N, T, 1) for
multivariate timeseries, where T is the number of timepoints and N is the
number of series.
"""
# predicted effects come with the following shapes:
# (T, 1) shaped array for univariate timeseries
# (N, T, 1) shaped array for multivariate timeseries, where N is the number of
# series

# Here you use the params to compute the effect.
raise NotImplementedError("Subclasses must implement _predict()")


class MyEffectName(BaseEffect):
"""Base class for effects."""

Expand Down Expand Up @@ -76,10 +136,17 @@ def _transform(self, X: pd.DataFrame, fh: pd.Index) -> Any:
array = series_to_tensor_or_array(X)
return array

def predict(
def _sample_params(self, data, predicted_effects):
# call numpyro.sample to sample the parameters of the effect
# return a dictionary with the sampled parameters, where
# key is the name of the parameter and value is the sampled value
return {}

def _predict(
self,
data: Dict,
data: Any,
predicted_effects: Dict[str, jnp.ndarray],
params: Dict[str, jnp.ndarray],
) -> jnp.ndarray:
"""Apply and return the effect values.

Expand All @@ -91,18 +158,20 @@ def predict(
predicted_effects : Dict[str, jnp.ndarray], optional
A dictionary containing the predicted effects, by default None.

params : Dict[str, jnp.ndarray]
A dictionary containing the sampled parameters of the effect.

Returns
-------
jnp.ndarray
An array with shape (T,1) for univariate timeseries, or (N, T, 1) for
multivariate timeseries, where T is the number of timepoints and N is the
number of series.
"""
# Get the trend
# predicted effects come with the following shapes:
# (T, 1) shaped array for univariate timeseries
# (N, T, 1) shaped array for multivariate timeseries, where N is the number of
# series
# trend: jnp.ndarray = predicted_effects["trend"]
# Or user predicted_effects.get("trend") to return None if the trend is
# not found

# Here you use the params to compute the effect.
raise NotImplementedError("Subclasses must implement _predict()")
Loading
Loading