Skip to content

Commit

Permalink
chore: minor docs updates; minor renamings
Browse files Browse the repository at this point in the history
  • Loading branch information
dynobo committed Jun 25, 2024
1 parent b99a851 commit e2c70ac
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 32 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

# Changelog

## 0.4.1 (2024-06-25)

- Add `lmdiag.style.use("default") for reverting to default style.
- Minor documentation updates.

## 0.4.0 (2024-06-24)

- Breaking changes:
Expand Down
24 changes: 11 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ lmdiag generates plots for _fitted_ linear regression models from
[`linearmodels`](https://bashtage.github.io/linearmodels/doc/index.html) and
[`scikit-learn`](https://scikit-learn.org/stable/).

You can find many examples in
You can find some usage examples in
[this jupyter notebook](https://github.com/dynobo/lmdiag/blob/master/example.ipynb).

### Example
Expand All @@ -33,10 +33,10 @@ import lmdiag

# Fit model with random sample data
np.random.seed(20)
predictor = np.random.normal(size=30, loc=20, scale=3)
response = 5 + 5 * predictor + np.random.normal(size=30)
X = sm.add_constant(predictor)
lm = sm.OLS(response, X).fit()
X = np.random.normal(size=30, loc=20, scale=3)
y = 5 + 5 * X + np.random.normal(size=30)
X = sm.add_constant(predictor) # intercept required by statsmodels
lm = sm.OLS(y, X).fit()

# Plot lmdiag facet chart
lmdiag.style.use(style="black_and_red") # Mimic R's plot.lm style
Expand Down Expand Up @@ -64,14 +64,14 @@ fig.show()

- Print description to aid plot interpretation:

`lmdiag.info()` (for all plots)
`lmdiag.help()` (for all plots)

`lmdiag.info('<method name>')` (for individual plot)
`lmdiag.help('<method name>')` (for individual plot)

### Performance
### Increase performance

Plotting models fitted on large datasets can be slow. There are some things you can try
to speed it up:
Plotting models fitted on large datasets might be slow. There are some things you can
try to speed it up:

#### 1. Tune LOWESS-parameters

Expand All @@ -98,9 +98,7 @@ import matplotlib
matplotlib.use('agg')
```

## Development

### Setup environment
### Setup development environment

```sh
python -m venv .venv
Expand Down
2 changes: 1 addition & 1 deletion lmdiag/help.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def help( # noqa: A001 # shadowing built-in
if m in plot_descriptions:
_print_desc(m)
else:
print( # noqa: T201
raise ValueError(
f"Unknown plotting method '{method}'."
"Run lmdiag.help() to print all available method descriptions."
)
16 changes: 12 additions & 4 deletions lmdiag/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def resid_fit(
For detailed explanation fo the lowess parameters, see:
https://www.statsmodels.org/stable/generated/statsmodels.nonparametric.smoothers_lowess.lowess.html
Run `lmdiag.help("resid_fit")` for hints on chart interpretation.
Args:
lm: A fitted linear model of a supported type.
x: X (predictor) of training data. Only for `sklearn` models!
Expand All @@ -54,7 +56,7 @@ def resid_fit(
Returns:
Figure of the plot.
"""
lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm=lm, x=x, y=y)

fitted = lm_stats.fitted_values
residuals = lm_stats.residuals
Expand Down Expand Up @@ -82,6 +84,8 @@ def q_q(
) -> mpl.figure.Figure:
"""Draw Q-Q-Plot.
Run `lmdiag.help("q_q")` for hints on chart interpretation.
Args:
lm: A fitted linear model of a supported type.
x: X (predictor) of training data. Only for `sklearn` models!
Expand All @@ -92,7 +96,7 @@ def q_q(
Returns:
Figure of the plot.
"""
lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm=lm, x=x, y=y)

std_resid = lm_stats.standard_residuals
quantiles = lm_stats.normalized_quantiles
Expand Down Expand Up @@ -135,6 +139,8 @@ def scale_loc(
For detailed explanation fo the lowess parameters, see:
https://www.statsmodels.org/stable/generated/statsmodels.nonparametric.smoothers_lowess.lowess.html
Run `lmdiag.help("scale_loc")` for hints on chart interpretation.
Args:
lm: A fitted linear model of a supported type.
x: X (predictor) of training data. Only for `sklearn` models!
Expand All @@ -149,7 +155,7 @@ def scale_loc(
Returns:
Figure of the plot.
"""
lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm=lm, x=x, y=y)

fitted_vals = lm_stats.fitted_values
sqrt_abs_res = lm_stats.sqrt_abs_residuals
Expand Down Expand Up @@ -185,6 +191,8 @@ def resid_lev(
For detailed explanation fo the lowess parameters, see:
https://www.statsmodels.org/stable/generated/statsmodels.nonparametric.smoothers_lowess.lowess.html
Run `lmdiag.help("resid_lev")` for hints on chart interpretation.
Args:
lm: A fitted linear model of a supported type.
x: X (predictor) of training data. Only for `sklearn` models!
Expand All @@ -199,7 +207,7 @@ def resid_lev(
Returns:
Figure of the plot.
"""
lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm=lm, x=x, y=y)

std_resid = lm_stats.standard_residuals
cooks_d = lm_stats.cooks_d
Expand Down
14 changes: 11 additions & 3 deletions lmdiag/statistics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@


def optionally_cached_property(func: Callable) -> property:
"""Cache property of a StatsBase instance, if caching is enabled."""
cached = cached_property(func)

@wraps(func)
def wrapper(cls: Any) -> Any:
if getattr(cls, "_cache_properties", False):
if getattr(cls, "_use_cache", False):
if not hasattr(cached, "__wrapped__"):
cached.__set_name__(cls, func.__name__)
return cached.__get__(cls)
Expand All @@ -21,9 +22,13 @@ def wrapper(cls: Any) -> Any:


class StatsBase(ABC):
_use_cache: bool

@optionally_cached_property
@abstractmethod
def residuals(self) -> np.ndarray: ...
def residuals(self) -> np.ndarray:
"""Distance of actual y from predicted y-hat."""
...

@optionally_cached_property
@abstractmethod
Expand All @@ -45,10 +50,13 @@ def leverage(self) -> np.typing.ArrayLike: ...

@optionally_cached_property
@abstractmethod
def parameter_count(self) -> int: ...
def parameter_count(self) -> int:
"""Degrees of freedom of the model; Count of variables + intercept."""
...

@optionally_cached_property
def sqrt_abs_residuals(self) -> np.ndarray:
"""Square root of absolute standardized residuals."""
return np.sqrt(np.abs(self.standard_residuals))

@optionally_cached_property
Expand Down
4 changes: 2 additions & 2 deletions lmdiag/statistics/linearmodels_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ class LinearmodelsStats(StatsBase):
def __init__(
self,
lm: linearmodels.iv.results.OLSResults,
cache: bool = True,
use_cache: bool = True,
) -> None:
super().__init__()
self._lm = lm
self._cache_properties = cache
self._use_cache = use_cache

@optionally_cached_property
def residuals(self) -> np.ndarray:
Expand Down
6 changes: 3 additions & 3 deletions lmdiag/statistics/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,19 @@ def get_stats(
if isinstance(lm, (RegressionResultsWrapper, GLMResults, RLMResults)):
if x or y:
_warn_x_y()
model_stats = _init_statsmodels_stats(lm)
model_stats = _init_statsmodels_stats(lm=lm)

elif linearmodels and isinstance(
lm, (linearmodels.iv.results.OLSResults, linearmodels.iv.results.IVResults)
):
if x or y:
_warn_x_y()
model_stats = _init_linearmodels_stats(lm)
model_stats = _init_linearmodels_stats(lm=lm)

elif sklearn and isinstance(lm, sklearn.linear_model.LinearRegression):
if x is None or y is None:
raise ValueError("x and y args must be provided this model type!")
model_stats = _init_sklearn_stats(lm, x, y)
model_stats = _init_sklearn_stats(lm=lm, x=x, y=y)

else:
raise TypeError(
Expand Down
4 changes: 2 additions & 2 deletions lmdiag/statistics/sklearn_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ def __init__(
lm: LinearRegression,
x: np.ndarray,
y: np.ndarray,
cache: bool = True,
use_cache: bool = True,
) -> None:
super().__init__()
self._lm = lm
self._X = x
self._y = y
self._cache_properties = cache
self._use_cache = use_cache

@optionally_cached_property
def residuals(self) -> np.ndarray:
Expand Down
4 changes: 2 additions & 2 deletions lmdiag/statistics/statsmodels_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ class StatsmodelsStats(StatsBase):
def __init__(
self,
lm: sm.regression.linear_model.RegressionResultsWrapper,
cache: bool = True,
use_cache: bool = True,
) -> None:
super().__init__()
self._lm = lm
self._cache_properties = cache
self._use_cache = use_cache
self.__ols_influence: Union[OLSInfluence, None] = None

@property
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "lmdiag"
version = "0.4.0"
version = "0.4.1"
description = "Diagnostic Plots for Lineare Regression Models. Similar to plot.lm in R."
keywords = [
"lm",
Expand All @@ -9,7 +9,9 @@ keywords = [
"diagnostics",
"plot",
"chart",
"matplotlib",
"linearmodels",
"statsmodels",
"scikit-learn",
]
readme = "README.md"
authors = [{ name = "dynobo", email = "[email protected]" }]
Expand Down

0 comments on commit e2c70ac

Please sign in to comment.