diff --git a/preliz/__init__.py b/preliz/__init__.py index 34298cb8..169b27e9 100644 --- a/preliz/__init__.py +++ b/preliz/__init__.py @@ -3,7 +3,6 @@ Tools to help you pick a prior """ -import logging from os import path as os_path from matplotlib import rcParams @@ -18,13 +17,6 @@ __version__ = "0.11.0" -_log = logging.getLogger("preliz") - -if not logging.root.handlers: - _log.setLevel(logging.INFO) - if len(_log.handlers) == 0: - handler = logging.StreamHandler() - _log.addHandler(handler) # Allow legend outside plot in maxent to be included when saving a figure # We may want to make this more explicit by having preliz.rcParams @@ -37,4 +29,4 @@ style.core.reload_library() # clean namespace -del logging, os_path, rcParams, _preliz_style_path, _log +del os_path, rcParams, _preliz_style_path diff --git a/preliz/internal/distribution_helper.py b/preliz/internal/distribution_helper.py index eda8cc19..93dcae9e 100644 --- a/preliz/internal/distribution_helper.py +++ b/preliz/internal/distribution_helper.py @@ -12,7 +12,7 @@ def from_precision(precision): def to_precision(sigma): - precision = 1 / sigma**2 + precision = 1 / (eps + sigma**2) return precision @@ -148,38 +148,15 @@ def num_kurtosis(dist): } -def get_distributions(dist_names=None, exclude=None): +def get_distributions(dist_names=None): if dist_names is None: all_distributions = modules["preliz.distributions"].__all__ else: all_distributions = dist_names - if exclude is None: - exclude = [] - if exclude == "auto": - exclude = [ - "Beta", - "BetaScaled", - "Triangular", - "TruncatedNormal", - "Uniform", - "VonMises", - "Categorical", - "DiscreteUniform", - "HyperGeometric", - "zeroInflatedBinomial", - "ZeroInflatedNegativeBinomial", - "ZeroInflatedPoisson", - "MvNormal", - "Mixture", - ] - distributions = [] for a_dist in all_distributions: dist = getattr(modules["preliz.distributions"], a_dist)() - if dist.__class__.__name__ not in exclude: - distributions.append(dist) - if exclude: - return exclude, distributions + distributions.append(dist) return distributions diff --git a/preliz/internal/logging.py b/preliz/internal/logging.py deleted file mode 100644 index 9f248ff4..00000000 --- a/preliz/internal/logging.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -from contextlib import contextmanager - - -@contextmanager -def disable_pymc_sampling_logs(logger: logging.Logger = logging.getLogger("pymc")): - effective_level = logger.getEffectiveLevel() - logger.setLevel(logging.ERROR) - try: - yield - finally: - logger.setLevel(effective_level) diff --git a/preliz/internal/parser.py b/preliz/internal/parser.py deleted file mode 100644 index 43e1cd51..00000000 --- a/preliz/internal/parser.py +++ /dev/null @@ -1,208 +0,0 @@ -import importlib -import inspect -import re -from sys import modules - -import numpy as np - -from preliz import distributions -from .distribution_helper import init_vals - - -def inspect_source(fmodel): - source = inspect.getsource(fmodel) - signature = inspect.signature(fmodel) - source = re.sub(r"#.*$|^#.*$", "", source, flags=re.MULTILINE) - default_params = { - name: (param.default if param.default is not inspect.Parameter.empty else np.nan) - for name, param in signature.parameters.items() - } - model = fmodel(**default_params) - return source, signature, get_engine(model) - - -def get_engine(model): - if getattr(model, "basic_RVs", False): - return "pymc" - elif getattr(model, "formula", False): - return "bambi" - return "preliz" - - -def parse_function_for_pred_textboxes(source, signature, engine="preliz"): - model = {} - - slidify = list(signature.parameters.keys()) - regex = r"\b" + r"\b|\b".join(slidify) + r"\b" - - all_dist_str = dist_as_str() - matches = match_preliz_dist(all_dist_str, source, engine) - - for match in matches: - if engine == "bambi": - dist_name_str = match.group(1) - else: - dist_name_str = match.group(2) - if engine == "bambi": - arguments = [s.strip() for s in match.group(2).split(",")] - else: - arguments = [s.strip() for s in match.group(3).split(",")] - if engine == "pymc": - args = pymc_parse_arguments(arguments, regex) - else: - args = parse_arguments(arguments, regex) - for arg in args: - if arg: - func, var, idx = arg - dist = getattr(distributions, dist_name_str) - model[var] = (dist(**init_vals[dist_name_str]), idx, func) - - return model - - -def parse_arguments(lst, regex): - result = [] - for idx, item in enumerate(lst): - match = re.search(regex, item) - if match: - if item.isidentifier(): - result.append((None, match.group(0), idx)) - else: - if "**" in item: - power = item.split("**")[1].strip() - result.append((power, match.group(0), idx)) - else: - func = item.split("(")[0].split(".")[-1] - result.append((func, match.group(0), idx)) - return result - - -def pymc_parse_arguments(lst, regex): - result = [] - for idx, item in enumerate(lst): - match = re.search(regex, item) - if match: - if item.isidentifier(): - result.append((None, match.group(0), idx - 1)) - else: - if "**" in item: - power = item.split("**")[1].strip() - result.append((power, match.group(0), idx - 1)) - else: - func = item.split("(")[0].split(".")[-1] - result.append((func, match.group(0), idx - 1)) - return result - - -def get_prior_pp_samples(fmodel, variables, draws, engine=None, values=None): - if values is None: - values = [] - - if engine == "preliz": - obs_rv = variables[-1] # only one observed for the moment - pp_samples_ = [] - prior_samples_ = {name: [] for name in variables[:-1]} - for _ in range(draws): - for name, value in zip(variables, fmodel(*values)): - if name == obs_rv: - pp_samples_.append(value) - else: - prior_samples_[name].append(value) - - pp_samples = np.stack(pp_samples_) - prior_samples = {key: np.array(val) for key, val in prior_samples_.items()} - elif engine == "bambi": - *prior_samples_, pp_samples = fmodel(*values) - prior_samples = {name: np.array(val) for name, val in zip(variables[:-1], prior_samples_)} - - return pp_samples, prior_samples - - -def from_preliz(fmodel): - source = inspect.getsource(fmodel) - variables = match_return_variables(source) - # Find the priors we want to change - all_dist_str = dist_as_str() - matches = match_preliz_dist(all_dist_str, source, "preliz") - # Create a dictionary with the priors - model = dict_model(matches, variables) - - return variables, model - - -def from_bambi(fmodel, draws): - module_name = fmodel.__module__ - module = importlib.import_module(module_name) - - # Get the source code of the original function - original_source = inspect.getsource(fmodel) - - # Define a pattern to find the line where the model is built - pattern = re.compile(r"(\s+)([a-zA-Z_]\w*)\s*=\s*.*?Model(.*)") - - # Find the match in the source code - match = pattern.search(original_source) - - # Extract the indentation and variable name - indentation = match.group(1) - variable_name = match.group(2) - - # Find the variables after the return statement - return_variables = match_return_variables(original_source) - - if return_variables: - # Build the new source code - new_source = original_source.replace( - match.group(0), - f"{match.group(0)}" - f"{indentation}{variable_name}.build()\n" - f"{indentation}variables = [{variable_name}.backend.model.named_vars[v] " - f"for v in {return_variables}]\n" - f'{indentation}{", ".join(return_variables)} = pm.draw(variables, draws={draws})', - ) - - # Find the priors we want to change - all_dist_str = dist_as_str() - matches = match_preliz_dist(all_dist_str, new_source, "bambi") - # Create a dictionary with the priors - model = dict_model(matches, return_variables) - - # Execute the new source code to redefine the function - exec(new_source, module.__dict__) # pylint: disable=exec-used - modified_fmodel = getattr(module, fmodel.__name__) - - return modified_fmodel, return_variables, model - - -def match_preliz_dist(all_dist_str, source, engine): - # remove comments - source = re.sub(r"#.*$|^#.*$", "", source, flags=re.MULTILINE) - - if engine in ["preliz", "pymc"]: - regex = rf"(.*?({all_dist_str}).*?)\(([^()]*(?:\([^()]*\)[^()]*)*)\)" - if engine == "bambi": - regex = rf'\s*(?:\w+\.)?Prior\("({all_dist_str})",\s*((?:\w+=\w+(?:,?\s*)?)*)\s*\)' - matches = re.finditer(regex, source) - return matches - - -def match_return_variables(source): - match = re.search(r"return (\w+(\s*,\s*\w+)*)", source) - return [var.strip() for var in match.group(1).split(",")] - - -def dist_as_str(): - all_distributions = modules["preliz.distributions"].__all__ - return "|".join(all_distributions) - - -def dict_model(matches, return_variables): - model = {} - obs_rv = return_variables[-1] - for match in matches: - var_name = match.group(0).split("=")[0].strip() - if var_name != obs_rv: - dist = getattr(modules["preliz.distributions"], match.group(2)) - model[var_name] = dist() - - return model diff --git a/preliz/internal/plot_helper.py b/preliz/internal/plot_helper.py index 6b30947f..dad94eee 100644 --- a/preliz/internal/plot_helper.py +++ b/preliz/internal/plot_helper.py @@ -5,7 +5,6 @@ try: from IPython import get_ipython from ipywidgets import FloatSlider, IntSlider, FloatText, IntText, Checkbox, ToggleButton - from pymc import sample_prior_predictive except ImportError: pass @@ -13,8 +12,7 @@ import matplotlib.pyplot as plt from matplotlib import _pylab_helpers, get_backend from matplotlib.ticker import MaxNLocator -from .logging import disable_pymc_sampling_logs -from .narviz import hdi, kde +from preliz.internal.narviz import hdi, kde def plot_pointinterval(distribution, interval="hdi", levels=None, rotated=False, ax=None): @@ -425,63 +423,6 @@ def looper(*args, **kwargs): return looper -def bambi_plot_decorator(func, iterations, kind_plot, references, plot_func): - def looper(*args, **kwargs): - kwargs.pop("__resample__") - x_min = kwargs.pop("__x_min__") - x_max = kwargs.pop("__x_max__") - if not kwargs.pop("__set_xlim__"): - x_min = None - x_max = None - auto = True - else: - auto = False - - model = func(*args, **kwargs) - model.build() - with disable_pymc_sampling_logs(): - idata = model.prior_predictive(iterations) - results = ( - idata["prior_predictive"].stack(sample=("chain", "draw"))[model.response_name].values.T - ) - - _, ax = plt.subplots() - ax.set_xlim(x_min, x_max, auto=auto) - if plot_func is None: - plot_repr(results, kind_plot, references, iterations, ax) - else: - plot_func(results, ax) - - return looper - - -def pymc_plot_decorator(func, iterations, kind_plot, references, plot_func): - def looper(*args, **kwargs): - kwargs.pop("__resample__") - x_min = kwargs.pop("__x_min__") - x_max = kwargs.pop("__x_max__") - if not kwargs.pop("__set_xlim__"): - x_min = None - x_max = None - auto = True - else: - auto = False - with func(*args, **kwargs) as model: - obs_name = model.observed_RVs[0].name - with disable_pymc_sampling_logs(): - idata = sample_prior_predictive(samples=iterations) - results = idata["prior_predictive"].stack(sample=("chain", "draw"))[obs_name].values.T - - _, ax = plt.subplots() - ax.set_xlim(x_min, x_max, auto=auto) - if plot_func is None: - plot_repr(results, kind_plot, references, iterations, ax) - else: - plot_func(results, ax) - - return looper - - def plot_repr(results, kind_plot, references, iterations, ax): alpha = max(0.01, 1 - iterations * 0.009) diff --git a/preliz/internal/predictive_helper.py b/preliz/internal/predictive_helper.py index b7b3a1c2..16248e54 100644 --- a/preliz/internal/predictive_helper.py +++ b/preliz/internal/predictive_helper.py @@ -1,12 +1,9 @@ import numpy as np -from .plot_helper import ( - repr_to_matplotlib, -) - -from ..unidimensional import mle -from .distribution_helper import get_distributions +from preliz.internal.plot_helper import repr_to_matplotlib +from preliz.internal.distribution_helper import get_distributions +from preliz.unidimensional import mle def back_fitting_ppa(model, subset, new_families=True): @@ -22,14 +19,17 @@ def back_fitting_ppa(model, subset, new_families=True): if new_families: string += "\nYour selection is consistent with the priors (new families):\n" - exclude, distributions = get_distributions(None, exclude="auto") + # We should store this in a central place + # So we use the same families for other functions + common_cont = ["Gamma", "Exponential"] + common_disc = ["Poisson", "NegativeBinomial"] for name, dist in model.items(): - if dist.__class__.__name__ in exclude: - dist._fit_mle(subset[name]) - else: - idx, _ = mle(distributions, subset[name], plot=False) - dist = distributions[idx[0]] - string += f"{name} = {repr_to_matplotlib(dist)}\n" + if dist.kind == "continuous": + distributions = get_distributions(set([dist.__class__.__name__] + common_cont)) + elif dist.kind == "discrete": + distributions = get_distributions(set([dist.__class__.__name__] + common_disc)) + idx, _ = mle(distributions, subset[name], plot=False) + string += f"{name} = {repr_to_matplotlib(distributions[idx[0]])}\n" return string, np.concatenate([dist.params for dist in model.values()]) diff --git a/preliz/multidimensional/dirichlet_mode.py b/preliz/multidimensional/dirichlet_mode.py index e781afd4..ecc1d9a6 100644 --- a/preliz/multidimensional/dirichlet_mode.py +++ b/preliz/multidimensional/dirichlet_mode.py @@ -1,13 +1,9 @@ -import logging import warnings import numpy as np from preliz.distributions import Dirichlet, Beta from preliz.internal.optimization import optimize_dirichlet_mode -_log = logging.getLogger("preliz") - - def dirichlet_mode(mode, mass=0.90, bound=0.01, plot=True, plot_kwargs=None, ax=None): """ Returns a Dirichlet distribution where the marginals have the specified mode @@ -62,8 +58,8 @@ def dirichlet_mode(mode, mass=0.90, bound=0.01, plot=True, plot_kwargs=None, ax= calculated_mode = (alpha_np - 1) / (alpha_np.sum() - len(alpha_np)) if np.any((np.array(mode) - calculated_mode) > 0.01): - _log.warning( - "The requested mode %s is different from the calculated mode %s.", mode, calculated_mode + warnings.warn( + f"The requested mode {mode} is different from the calculated mode {calculated_mode}." ) dirichlet_distribution = Dirichlet(alpha) diff --git a/preliz/ppls/agnostic.py b/preliz/ppls/agnostic.py index 03f07460..7b439b6e 100644 --- a/preliz/ppls/agnostic.py +++ b/preliz/ppls/agnostic.py @@ -1,18 +1,38 @@ """Functions to communicate with PPLs.""" + +from contextlib import contextmanager +import inspect import logging +import re +import warnings + + +import matplotlib.pyplot as plt +import numpy as np -from preliz.internal.parser import get_engine +from preliz import distributions +from preliz.internal.distribution_helper import init_vals +from preliz.internal.plot_helper import plot_repr from preliz.distributions import Gamma, Normal, HalfNormal from preliz.unidimensional.mle import mle from preliz.ppls.pymc_io import get_model_information, write_pymc_string -from preliz.ppls.bambi_io import get_pymc_model, write_bambi_string +from preliz.ppls.bambi_io import ( + get_pymc_model, + write_bambi_string, + dist_as_str, + match_return_variables, + dict_model, +) -_log = logging.getLogger("preliz") +try: + from pymc import sample_prior_predictive +except ImportError: + pass -def posterior_to_prior(model, idata, alternative=None, engine="auto"): +def posterior_to_prior(model, idata, new_families=None, engine="auto"): """ Fit a posterior from a model to its prior @@ -25,7 +45,7 @@ def posterior_to_prior(model, idata, alternative=None, engine="auto"): model : A PyMC or a Bambi Model idata : InferenceData InferenceData with a posterior group. - alternative : "auto", list or dict + new_families : "auto", list or dict Defaults to None, the samples are fit to the original prior distribution. If "auto", the method evaluates the fit to the original prior plus a set of predefined distributions. @@ -39,7 +59,7 @@ def posterior_to_prior(model, idata, alternative=None, engine="auto"): The function will automatically select the appropriate library to use based on the model provided. """ - _log.info(""""This is an experimental method under development, use with caution.""") + warnings.warn(""""This is an experimental method under development, use with caution.""") engine = get_engine(model) if engine == "auto" else engine if engine == "bambi": @@ -47,7 +67,7 @@ def posterior_to_prior(model, idata, alternative=None, engine="auto"): _, _, preliz_model, _, untransformed_var_info, *_ = get_model_information(model) - new_priors = back_fitting_idata(idata, preliz_model, alternative) + new_priors = back_fitting_idata(idata, preliz_model, new_families) if engine == "bambi": new_model = write_bambi_string(new_priors, untransformed_var_info) @@ -57,11 +77,11 @@ def posterior_to_prior(model, idata, alternative=None, engine="auto"): return new_model -def back_fitting_idata(idata, model_info, alternative): +def back_fitting_idata(idata, model_info, new_families): new_priors = {} posterior = idata.posterior.stack(sample=("chain", "draw")) - if alternative is None: + if new_families is None: for var, dist in model_info.items(): idx, _ = mle([dist], posterior[var].values, plot=False) new_priors[var] = dist @@ -69,14 +89,193 @@ def back_fitting_idata(idata, model_info, alternative): for var, dist in model_info.items(): dists = [dist] - if alternative == "auto": + if new_families == "auto": alt = [Normal(), HalfNormal(), Gamma()] dists += [a for a in alt if dist.__class__.__name__ != a.__class__.__name__] - elif isinstance(alternative, list): - dists += alternative - elif isinstance(alternative, dict): - dists += alternative.get(var, []) + elif isinstance(new_families, list): + dists += new_families + elif isinstance(new_families, dict): + dists += new_families.get(var, []) idx, _ = mle(dists, posterior[var].values, plot=False) new_priors[var] = dists[idx[0]] return new_priors + + +def inspect_source(fmodel): + source = inspect.getsource(fmodel) + signature = inspect.signature(fmodel) + source = re.sub(r"#.*$|^#.*$", "", source, flags=re.MULTILINE) + default_params = { + name: (param.default if param.default is not inspect.Parameter.empty else np.nan) + for name, param in signature.parameters.items() + } + model = fmodel(**default_params) + return source, signature, get_engine(model) + + +def get_engine(model): + if getattr(model, "basic_RVs", False): + return "pymc" + elif getattr(model, "formula", False): + return "bambi" + return "preliz" + + +def parse_function_for_pred_textboxes(source, signature, engine="preliz"): + model = {} + + slidify = list(signature.parameters.keys()) + regex = r"\b" + r"\b|\b".join(slidify) + r"\b" + + all_dist_str = dist_as_str() + matches = match_preliz_dist(all_dist_str, source, engine) + + for match in matches: + if engine == "bambi": + dist_name_str = match.group(1) + else: + dist_name_str = match.group(2) + + if engine == "bambi": + arguments = [s.strip() for s in match.group(2).split(",")] + else: + arguments = [s.strip() for s in match.group(3).split(",")] + + args = parse_arguments(arguments, regex, engine) + for arg in args: + if arg: + func, var, idx = arg + dist = getattr(distributions, dist_name_str) + model[var] = (dist(**init_vals[dist_name_str]), idx, func) + + return model + + +def parse_arguments(lst, regex, engine): + result = [] + if engine == "pymc": + offset = 1 + else: + offset = 0 + for idx, item in enumerate(lst): + match = re.search(regex, item) + if match: + if item.isidentifier(): + result.append((None, match.group(0), idx - offset)) + else: + if "**" in item: + power = item.split("**")[1].strip() + result.append((power, match.group(0), idx - offset)) + else: + func = item.split("(")[0].split(".")[-1] + result.append((func, match.group(0), idx - offset)) + return result + + +def get_prior_pp_samples(fmodel, variables, draws, engine=None, values=None): + if values is None: + values = [] + + if engine == "preliz": + obs_rv = variables[-1] # only one observed for the moment + pp_samples_ = [] + prior_samples_ = {name: [] for name in variables[:-1]} + for _ in range(draws): + for name, value in zip(variables, fmodel(*values)): + if name == obs_rv: + pp_samples_.append(value) + else: + prior_samples_[name].append(value) + + pp_samples = np.stack(pp_samples_) + prior_samples = {key: np.array(val) for key, val in prior_samples_.items()} + elif engine == "bambi": + *prior_samples_, pp_samples = fmodel(*values) + prior_samples = {name: np.array(val) for name, val in zip(variables[:-1], prior_samples_)} + + return pp_samples, prior_samples + + +def from_preliz(fmodel): + source = inspect.getsource(fmodel) + variables = match_return_variables(source) + # Find the priors we want to change + all_dist_str = dist_as_str() + matches = match_preliz_dist(all_dist_str, source, "preliz") + # Create a dictionary with the priors + model = dict_model(matches, variables) + + return variables, model + + +def match_preliz_dist(all_dist_str, source, engine): + # remove comments + source = re.sub(r"#.*$|^#.*$", "", source, flags=re.MULTILINE) + + if engine in ["preliz", "pymc"]: + regex = rf"(.*?({all_dist_str}).*?)\(([^()]*(?:\([^()]*\)[^()]*)*)\)" + if engine == "bambi": + regex = rf'\s*(?:\w+\.)?Prior\("({all_dist_str})",\s*((?:\w+=\w+(?:,?\s*)?)*)\s*\)' + matches = re.finditer(regex, source) + return matches + + +def ppl_plot_decorator(func, iterations, kind_plot, references, plot_func, engine): + def looper(*args, **kwargs): + kwargs.pop("__resample__") + x_min = kwargs.pop("__x_min__") + x_max = kwargs.pop("__x_max__") + if not kwargs.pop("__set_xlim__"): + x_min = None + x_max = None + auto = True + else: + auto = False + + if engine == "preliz": + results = [] + for _ in range(iterations): + val = func(*args, **kwargs) + if not any(np.isnan(val)): + results.append(val) + results = np.array(results) + + elif engine == "bambi": + model = func(*args, **kwargs) + model.build() + with disable_pymc_sampling_logs(): + idata = model.prior_predictive(iterations) + results = ( + idata["prior_predictive"] + .stack(sample=("chain", "draw"))[model.response_component.response.name] + .values.T + ) + + elif engine == "pymc": + with func(*args, **kwargs) as model: + obs_name = model.observed_RVs[0].name + with disable_pymc_sampling_logs(): + idata = sample_prior_predictive(samples=iterations) + results = ( + idata["prior_predictive"].stack(sample=("chain", "draw"))[obs_name].values.T + ) + + _, ax = plt.subplots() + ax.set_xlim(x_min, x_max, auto=auto) + if plot_func is None: + plot_repr(results, kind_plot, references, iterations, ax) + else: + plot_func(results, ax) + + return looper + + +@contextmanager +def disable_pymc_sampling_logs(logger: logging.Logger = logging.getLogger("pymc")): + effective_level = logger.getEffectiveLevel() + logger.setLevel(logging.ERROR) + try: + yield + finally: + logger.setLevel(effective_level) diff --git a/preliz/ppls/bambi_io.py b/preliz/ppls/bambi_io.py index 5208591b..e31fcf5a 100644 --- a/preliz/ppls/bambi_io.py +++ b/preliz/ppls/bambi_io.py @@ -1,5 +1,10 @@ """Functions to communicate with Bambi.""" +import importlib +import inspect +import re +from sys import modules + def get_pymc_model(model): if not model.built: @@ -25,3 +30,77 @@ def write_bambi_string(new_priors, var_info): header = header.rstrip(", ") + "}" return header + + +def from_bambi(fmodel, draws): + module_name = fmodel.__module__ + module = importlib.import_module(module_name) + + # Get the source code of the original function + original_source = inspect.getsource(fmodel) + + # Define a pattern to find the line where the model is built + pattern = re.compile(r"(\s+)([a-zA-Z_]\w*)\s*=\s*.*?Model(.*)") + + # Find the match in the source code + match = pattern.search(original_source) + + # Extract the indentation and variable name + indentation = match.group(1) + variable_name = match.group(2) + + # Find the variables after the return statement + return_variables = match_return_variables(original_source) + + if return_variables: + # Build the new source code + new_source = original_source.replace( + match.group(0), + f"{match.group(0)}" + f"{indentation}{variable_name}.build()\n" + f"{indentation}variables = [{variable_name}.backend.model.named_vars[v] " + f"for v in {return_variables}]\n" + f'{indentation}{", ".join(return_variables)} = pm.draw(variables, draws={draws})', + ) + + # Find the priors we want to change + all_dist_str = dist_as_str() + matches = match_preliz_dist(all_dist_str, new_source) + # Create a dictionary with the priors + model = dict_model(matches, return_variables) + + # Execute the new source code to redefine the function + exec(new_source, module.__dict__) # pylint: disable=exec-used + modified_fmodel = getattr(module, fmodel.__name__) + + return modified_fmodel, return_variables, model + + +def match_preliz_dist(all_dist_str, source): + # remove comments + source = re.sub(r"#.*$|^#.*$", "", source, flags=re.MULTILINE) + regex = rf'\s*(?:\w+\.)?Prior\("({all_dist_str})",\s*((?:\w+=\w+(?:,?\s*)?)*)\s*\)' + matches = re.finditer(regex, source) + return matches + + +def match_return_variables(source): + match = re.search(r"return (\w+(\s*,\s*\w+)*)", source) + return [var.strip() for var in match.group(1).split(",")] + + +def dist_as_str(): + all_distributions = modules["preliz.distributions"].__all__ + return "|".join(all_distributions) + + +def dict_model(matches, return_variables): + model = {} + obs_rv = return_variables[-1] + for match in matches: + var_name = match.group(0).split("=")[0].strip() + if var_name != obs_rv: + dist = getattr(modules["preliz.distributions"], match.group(2)) + model[var_name] = dist() + + return model diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index 89f262eb..47a0cbdf 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -1,8 +1,8 @@ """Methods to communicate with PyMC.""" # pylint: disable=protected-access -from sys import modules from copy import copy +from sys import modules import numpy as np diff --git a/preliz/predictive/ppa.py b/preliz/predictive/ppa.py index dde8f2fd..0df0e48e 100644 --- a/preliz/predictive/ppa.py +++ b/preliz/predictive/ppa.py @@ -1,8 +1,8 @@ """Prior predictive check assistant.""" -import logging import ast from random import shuffle +import warnings try: import ipywidgets as widgets @@ -13,21 +13,26 @@ from scipy.spatial import KDTree -from ..internal.plot_helper import ( +from preliz.internal.plot_helper import ( check_inside_notebook, plot_pp_samples, plot_pp_mean, ) -from ..internal.parser import get_prior_pp_samples, from_preliz, from_bambi -from ..internal.predictive_helper import back_fitting_ppa, select_prior_samples -from ..distributions import Normal -from ..distributions.distributions import Distribution - -_log = logging.getLogger("preliz") +from preliz.ppls.agnostic import get_prior_pp_samples, from_preliz +from preliz.ppls.bambi_io import from_bambi +from preliz.internal.predictive_helper import back_fitting_ppa, select_prior_samples +from preliz.distributions import Normal +from preliz.distributions.distributions import Distribution def ppa( - fmodel, draws=2000, references=0, boundaries=(-np.inf, np.inf), target=None, engine="preliz" + fmodel, + draws=2000, + references=0, + boundaries=(-np.inf, np.inf), + target=None, + new_families=True, + engine="preliz", ): """ Prior predictive check assistant. @@ -49,14 +54,19 @@ def ppa( Target distribution. The first shown distributions will be selected to be as close as possible to `target`. Available options are, a PreliZ distribution or a 2-tuple with the first element representing the mean and the second the standard deviation. + new_families : bool + If True, the method will return the best fitting distribution from a set of common + distributions engine : str Library used to define the model. Either `preliz` or `bambi`. Defaults to `preliz` """ check_inside_notebook(need_widget=True) - _log.info(""""This is an experimental method under development, use with caution.""") + warnings.warn(""""This is an experimental method under development, use with caution.""") - filter_dists = FilterDistribution(fmodel, draws, references, boundaries, target, engine) + filter_dists = FilterDistribution( + fmodel, draws, references, boundaries, target, new_families, engine + ) filter_dists() output = widgets.Output() @@ -140,13 +150,14 @@ def click(event): class FilterDistribution: # pylint:disable=too-many-instance-attributes - def __init__(self, fmodel, draws, references, boundaries, target, engine): + def __init__(self, fmodel, draws, references, boundaries, target, new_families, engine): self.fmodel = fmodel self.source = "" # string representation of the model self.draws = draws self.references = references self.boundaries = boundaries self.target = target + self.new_families = new_families self.engine = engine self.pp_samples = None # prior predictive samples self.prior_samples = None # prior samples used for backfitting @@ -173,6 +184,7 @@ def __call__(self): elif self.engine == "bambi": self.fmodel, variables, self.model = from_bambi(self.fmodel, self.draws) + print(variables, self.model) self.pp_samples, self.prior_samples = get_prior_pp_samples( self.fmodel, variables, self.draws, self.engine ) @@ -386,7 +398,7 @@ def on_return_prior(self): if len(selected) > 4: subsample = select_prior_samples(selected, self.prior_samples, self.model) - string, _ = back_fitting_ppa(self.model, subsample, new_families=False) + string, _ = back_fitting_ppa(self.model, subsample, new_families=self.new_families) self.fig.clf() plt.text(0.05, 0.5, string, fontsize=14) diff --git a/preliz/predictive/ppe.py b/preliz/predictive/ppe.py index ed4d9ad0..4828893f 100644 --- a/preliz/predictive/ppe.py +++ b/preliz/predictive/ppe.py @@ -5,8 +5,7 @@ from preliz.internal.optimization import optimize_pymc_model from preliz.ppls.bambi_io import get_pymc_model, write_bambi_string -from preliz.ppls.agnostic import back_fitting_idata -from preliz.internal.parser import get_engine +from preliz.ppls.agnostic import back_fitting_idata, get_engine from preliz.ppls.pymc_io import ( get_model_information, get_initial_guess, @@ -109,7 +108,7 @@ def ppe(model, target, method="projective", engine="auto", random_state=0): with model: idata = fit(method="pathfinder", num_samples=1000) - new_priors = back_fitting_idata(idata, preliz_model, alternative=False) + new_priors = back_fitting_idata(idata, preliz_model, new_families=False) if engine == "bambi": new_model = write_bambi_string(new_priors, untransformed_var_info) elif engine == "pymc": diff --git a/preliz/predictive/predictive_explorer.py b/preliz/predictive/predictive_explorer.py index c47c10ef..3eb6feb1 100644 --- a/preliz/predictive/predictive_explorer.py +++ b/preliz/predictive/predictive_explorer.py @@ -3,13 +3,12 @@ from ipywidgets import VBox, HBox, interactive_output except ImportError: pass -from preliz.internal.parser import inspect_source, parse_function_for_pred_textboxes -from preliz.internal.plot_helper import ( - get_textboxes, - plot_decorator, - pymc_plot_decorator, - bambi_plot_decorator, +from preliz.ppls.agnostic import ( + inspect_source, + parse_function_for_pred_textboxes, + ppl_plot_decorator, ) +from preliz.internal.plot_helper import get_textboxes def predictive_explorer( @@ -46,12 +45,7 @@ def predictive_explorer( source, signature, engine = inspect_source(fmodel) model = parse_function_for_pred_textboxes(source, signature, engine) textboxes = get_textboxes(signature, model) - if engine == "pymc": - new_fmodel = pymc_plot_decorator(fmodel, samples, kind_plot, references, plot_func) - elif engine == "bambi": - new_fmodel = bambi_plot_decorator(fmodel, samples, kind_plot, references, plot_func) - else: - new_fmodel = plot_decorator(fmodel, samples, kind_plot, references, plot_func) + new_fmodel = ppl_plot_decorator(fmodel, samples, kind_plot, references, plot_func, engine) out = interactive_output(new_fmodel, textboxes) default_names = ["__set_xlim__", "__x_min__", "__x_max__", "__resample__"] default_controls = [textboxes[name] for name in default_names] diff --git a/preliz/tests/test_posterior_to_prior.py b/preliz/tests/test_posterior_to_prior.py index 55541051..8f13dde2 100644 --- a/preliz/tests/test_posterior_to_prior.py +++ b/preliz/tests/test_posterior_to_prior.py @@ -17,10 +17,10 @@ def test_p2p_pymc(): pz.posterior_to_prior(model, idata) - assert 'Gamma\x1b[0m("b", alpha=' in pz.posterior_to_prior(model, idata, alternative="auto") - pz.posterior_to_prior(model, idata, alternative=[pz.LogNormal()]) + assert 'Gamma\x1b[0m("b", alpha=' in pz.posterior_to_prior(model, idata, new_families="auto") + pz.posterior_to_prior(model, idata, new_families=[pz.LogNormal()]) assert 'Gamma\x1b[0m("b", mu=' in pz.posterior_to_prior( - model, idata, alternative={"b": [pz.Gamma(mu=0)]} + model, idata, new_families={"b": [pz.Gamma(mu=0)]} ) @@ -39,9 +39,9 @@ def test_p2p_pymc(): def test_p2p_bambi(): pz.posterior_to_prior(bmb_model, bmb_idata) assert 'Gamma\x1b[0m", alpha=' in pz.posterior_to_prior( - bmb_model, bmb_idata, alternative="auto" + bmb_model, bmb_idata, new_families="auto" ) - pz.posterior_to_prior(bmb_model, bmb_idata, alternative=[pz.LogNormal()]) + pz.posterior_to_prior(bmb_model, bmb_idata, new_families=[pz.LogNormal()]) assert 'Normal\x1b[0m", mu=' in pz.posterior_to_prior( - bmb_model, bmb_idata, alternative={"Intercept": [pz.Normal(mu=1, sigma=1)]} + bmb_model, bmb_idata, new_families={"Intercept": [pz.Normal(mu=1, sigma=1)]} ) diff --git a/preliz/unidimensional/beta_mode.py b/preliz/unidimensional/beta_mode.py index 182fd46d..8cdbb564 100644 --- a/preliz/unidimensional/beta_mode.py +++ b/preliz/unidimensional/beta_mode.py @@ -1,9 +1,5 @@ -import logging - -from ..distributions import Beta -from ..internal.optimization import optimize_beta_mode - -_log = logging.getLogger("preliz") +from preliz.distributions.beta import Beta +from preliz.internal.optimization import optimize_beta_mode def beta_mode(lower, upper, mode, mass=0.94, plot=True, plot_kwargs=None, ax=None): diff --git a/preliz/unidimensional/combine_roulette.py b/preliz/unidimensional/combine_roulette.py index 7a7b67f0..0a2b0bf3 100644 --- a/preliz/unidimensional/combine_roulette.py +++ b/preliz/unidimensional/combine_roulette.py @@ -1,6 +1,7 @@ import numpy as np -from preliz.internal.distribution_helper import process_extra, get_distributions +from preliz.internal.distribution_helper import get_distributions +from preliz.internal.distribution_helper import process_extra from preliz.internal.optimization import fit_to_epdf diff --git a/preliz/unidimensional/maxent.py b/preliz/unidimensional/maxent.py index 1faf50b7..8e75c734 100644 --- a/preliz/unidimensional/maxent.py +++ b/preliz/unidimensional/maxent.py @@ -1,10 +1,8 @@ -import logging +import warnings -from ..distributions import Normal -from ..internal.distribution_helper import valid_distribution -from ..internal.optimization import relative_error, optimize_max_ent, get_fixed_params - -_log = logging.getLogger("preliz") +from preliz.distributions.normal import Normal +from preliz.internal.distribution_helper import valid_distribution +from preliz.internal.optimization import relative_error, optimize_max_ent, get_fixed_params def maxent( @@ -98,9 +96,9 @@ def maxent( if distribution.kind == "discrete": if not end_points_ints(lower, upper): - _log.info( - "%s distribution is discrete, but the provided bounds are not integers", - distribution.__class__.__name__, + warnings.warn( + f"\n{distribution.__class__.__name__} distribution is discrete, " + "but the provided bounds are not integers" ) # Find which parameters has been fixed @@ -124,10 +122,9 @@ def maxent( r_error, computed_mass = relative_error(distribution, lower, upper, mass) if r_error > 0.01: - _log.info( - " The requested mass is %.3g, but the computed one is %.3g", - mass, - computed_mass, + warnings.warn( + f"\nThe requested mass is {mass:.3g},\n" f"but the computed one is {computed_mass:.3g}", + stacklevel=2, ) if plot: diff --git a/preliz/unidimensional/quartile.py b/preliz/unidimensional/quartile.py index 12a6f956..c29d965f 100644 --- a/preliz/unidimensional/quartile.py +++ b/preliz/unidimensional/quartile.py @@ -1,12 +1,9 @@ -import logging - +import warnings import numpy as np -from ..distributions import Normal -from ..internal.distribution_helper import valid_distribution -from ..internal.optimization import relative_error, optimize_quartile, get_fixed_params - -_log = logging.getLogger("preliz") +from preliz.distributions.normal import Normal +from preliz.internal.distribution_helper import valid_distribution +from preliz.internal.optimization import relative_error, optimize_quartile, get_fixed_params def quartile( @@ -112,9 +109,12 @@ def quartile( r_error, _ = relative_error(distribution, q1, q3, 0.5) if r_error > 0.01: - _log.info( - "The expected masses are 0.25, 0.5, 0.75\n The computed ones are: %.2g, %.2g, %.2g", - *distribution.cdf(quartiles) + computed_masses = distribution.cdf(quartiles).astype(float) + warnings.warn( + f"\nThe expected masses are 0.25, 0.5, 0.75\n" + f"The computed ones are: {computed_masses[0]:.2g}, " + f"{computed_masses[1]:.2g}, {computed_masses[2]:.2g}", + stacklevel=2, ) if plot: diff --git a/preliz/unidimensional/quartile_int.py b/preliz/unidimensional/quartile_int.py index 7ae3ac3a..99310ca8 100644 --- a/preliz/unidimensional/quartile_int.py +++ b/preliz/unidimensional/quartile_int.py @@ -4,13 +4,13 @@ pass from preliz.internal.optimization import fit_to_quartile -from ..internal.plot_helper import ( +from preliz.internal.plot_helper import ( create_figure, check_inside_notebook, representations, reset_dist_panel, ) -from ..internal.distribution_helper import process_extra, get_distributions +from preliz.internal.distribution_helper import process_extra, get_distributions class QuartileInt: diff --git a/preliz/unidimensional/roulette.py b/preliz/unidimensional/roulette.py index d7bae9f9..0eb08225 100644 --- a/preliz/unidimensional/roulette.py +++ b/preliz/unidimensional/roulette.py @@ -9,10 +9,10 @@ except ImportError: pass -from ..internal.optimization import fit_to_epdf -from ..internal.plot_helper import check_inside_notebook, representations -from ..internal.distribution_helper import process_extra, get_distributions -from ..distributions import all_discrete, all_continuous +from preliz.internal.optimization import fit_to_epdf +from preliz.internal.plot_helper import check_inside_notebook, representations +from preliz.internal.distribution_helper import process_extra, get_distributions +from preliz.distributions import all_discrete, all_continuous class Roulette: