chore: minor docs updates; minor renamings

dynobo · Jun 25, 2024 · e2c70ac · e2c70ac
1 parent b99a851
commit e2c70ac
Show file tree

Hide file tree

Showing 10 changed files with 53 additions and 32 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -2,6 +2,11 @@
 
 # Changelog
 
+## 0.4.1 (2024-06-25)
+
+- Add `lmdiag.style.use("default") for reverting to default style.
+- Minor documentation updates.
+
 ## 0.4.0 (2024-06-24)
 
 - Breaking changes:

diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ lmdiag generates plots for _fitted_ linear regression models from
 [`linearmodels`](https://bashtage.github.io/linearmodels/doc/index.html) and
 [`scikit-learn`](https://scikit-learn.org/stable/).
 
-You can find many examples in
+You can find some usage examples in
 [this jupyter notebook](https://github.com/dynobo/lmdiag/blob/master/example.ipynb).
 
 ### Example
@@ -33,10 +33,10 @@ import lmdiag
 
 # Fit model with random sample data
 np.random.seed(20)
-predictor = np.random.normal(size=30, loc=20, scale=3)
-response = 5 + 5 * predictor + np.random.normal(size=30)
-X = sm.add_constant(predictor)
-lm = sm.OLS(response, X).fit()
+X = np.random.normal(size=30, loc=20, scale=3)
+y = 5 + 5 * X + np.random.normal(size=30)
+X = sm.add_constant(predictor)  # intercept required by statsmodels
+lm = sm.OLS(y, X).fit()
 
 # Plot lmdiag facet chart
 lmdiag.style.use(style="black_and_red")  # Mimic R's plot.lm style
@@ -64,14 +64,14 @@ fig.show()
 
 - Print description to aid plot interpretation:
 
-  `lmdiag.info()` (for all plots)
+  `lmdiag.help()` (for all plots)
 
-  `lmdiag.info('<method name>')` (for individual plot)
+  `lmdiag.help('<method name>')` (for individual plot)
 
-### Performance
+### Increase performance
 
-Plotting models fitted on large datasets can be slow. There are some things you can try
-to speed it up:
+Plotting models fitted on large datasets might be slow. There are some things you can
+try to speed it up:
 
 #### 1. Tune LOWESS-parameters
 
@@ -98,9 +98,7 @@ import matplotlib
 matplotlib.use('agg')
 ```
 
-## Development
-
-### Setup environment
+### Setup development environment
 
 ```sh
 python -m venv .venv

diff --git a/lmdiag/help.py b/lmdiag/help.py
@@ -128,7 +128,7 @@ def help(  # noqa: A001 # shadowing built-in
         if m in plot_descriptions:
             _print_desc(m)
         else:
-            print(  # noqa: T201
+            raise ValueError(
                 f"Unknown plotting method '{method}'."
                 "Run lmdiag.help() to print all available method descriptions."
             )
diff --git a/lmdiag/plots.py b/lmdiag/plots.py
@@ -40,6 +40,8 @@ def resid_fit(
     For detailed explanation fo the lowess parameters, see:
     https://www.statsmodels.org/stable/generated/statsmodels.nonparametric.smoothers_lowess.lowess.html
 
+    Run `lmdiag.help("resid_fit")` for hints on chart interpretation.
+
     Args:
         lm: A fitted linear model of a supported type.
         x: X (predictor) of training data. Only for `sklearn` models!
@@ -54,7 +56,7 @@ def resid_fit(
     Returns:
         Figure of the plot.
     """
-    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
+    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm=lm, x=x, y=y)
 
     fitted = lm_stats.fitted_values
     residuals = lm_stats.residuals
@@ -82,6 +84,8 @@ def q_q(
 ) -> mpl.figure.Figure:
     """Draw Q-Q-Plot.
 
+    Run `lmdiag.help("q_q")` for hints on chart interpretation.
+
     Args:
         lm: A fitted linear model of a supported type.
         x: X (predictor) of training data. Only for `sklearn` models!
@@ -92,7 +96,7 @@ def q_q(
     Returns:
         Figure of the plot.
     """
-    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
+    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm=lm, x=x, y=y)
 
     std_resid = lm_stats.standard_residuals
     quantiles = lm_stats.normalized_quantiles
@@ -135,6 +139,8 @@ def scale_loc(
     For detailed explanation fo the lowess parameters, see:
     https://www.statsmodels.org/stable/generated/statsmodels.nonparametric.smoothers_lowess.lowess.html
 
+    Run `lmdiag.help("scale_loc")` for hints on chart interpretation.
+
     Args:
         lm: A fitted linear model of a supported type.
         x: X (predictor) of training data. Only for `sklearn` models!
@@ -149,7 +155,7 @@ def scale_loc(
     Returns:
         Figure of the plot.
     """
-    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
+    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm=lm, x=x, y=y)
 
     fitted_vals = lm_stats.fitted_values
     sqrt_abs_res = lm_stats.sqrt_abs_residuals
@@ -185,6 +191,8 @@ def resid_lev(
     For detailed explanation fo the lowess parameters, see:
     https://www.statsmodels.org/stable/generated/statsmodels.nonparametric.smoothers_lowess.lowess.html
 
+    Run `lmdiag.help("resid_lev")` for hints on chart interpretation.
+
     Args:
         lm: A fitted linear model of a supported type.
         x: X (predictor) of training data. Only for `sklearn` models!
@@ -199,7 +207,7 @@ def resid_lev(
     Returns:
         Figure of the plot.
     """
-    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm, x=x, y=y)
+    lm_stats = lm if isinstance(lm, StatsBase) else get_stats(lm=lm, x=x, y=y)
 
     std_resid = lm_stats.standard_residuals
     cooks_d = lm_stats.cooks_d

diff --git a/lmdiag/statistics/base.py b/lmdiag/statistics/base.py
@@ -7,11 +7,12 @@
 
 
 def optionally_cached_property(func: Callable) -> property:
+    """Cache property of a StatsBase instance, if caching is enabled."""
     cached = cached_property(func)
 
     @wraps(func)
     def wrapper(cls: Any) -> Any:
-        if getattr(cls, "_cache_properties", False):
+        if getattr(cls, "_use_cache", False):
             if not hasattr(cached, "__wrapped__"):
                 cached.__set_name__(cls, func.__name__)
             return cached.__get__(cls)
@@ -21,9 +22,13 @@ def wrapper(cls: Any) -> Any:
 
 
 class StatsBase(ABC):
+    _use_cache: bool
+
     @optionally_cached_property
     @abstractmethod
-    def residuals(self) -> np.ndarray: ...
+    def residuals(self) -> np.ndarray:
+        """Distance of actual y from predicted y-hat."""
+        ...
 
     @optionally_cached_property
     @abstractmethod
@@ -45,10 +50,13 @@ def leverage(self) -> np.typing.ArrayLike: ...
 
     @optionally_cached_property
     @abstractmethod
-    def parameter_count(self) -> int: ...
+    def parameter_count(self) -> int:
+        """Degrees of freedom of the model; Count of variables + intercept."""
+        ...
 
     @optionally_cached_property
     def sqrt_abs_residuals(self) -> np.ndarray:
+        """Square root of absolute standardized residuals."""
         return np.sqrt(np.abs(self.standard_residuals))
 
     @optionally_cached_property

diff --git a/lmdiag/statistics/linearmodels_stats.py b/lmdiag/statistics/linearmodels_stats.py
@@ -8,11 +8,11 @@ class LinearmodelsStats(StatsBase):
     def __init__(
         self,
         lm: linearmodels.iv.results.OLSResults,
-        cache: bool = True,
+        use_cache: bool = True,
     ) -> None:
         super().__init__()
         self._lm = lm
-        self._cache_properties = cache
+        self._use_cache = use_cache
 
     @optionally_cached_property
     def residuals(self) -> np.ndarray:

diff --git a/lmdiag/statistics/select.py b/lmdiag/statistics/select.py
@@ -51,19 +51,19 @@ def get_stats(
     if isinstance(lm, (RegressionResultsWrapper, GLMResults, RLMResults)):
         if x or y:
             _warn_x_y()
-        model_stats = _init_statsmodels_stats(lm)
+        model_stats = _init_statsmodels_stats(lm=lm)
 
     elif linearmodels and isinstance(
         lm, (linearmodels.iv.results.OLSResults, linearmodels.iv.results.IVResults)
     ):
         if x or y:
             _warn_x_y()
-        model_stats = _init_linearmodels_stats(lm)
+        model_stats = _init_linearmodels_stats(lm=lm)
 
     elif sklearn and isinstance(lm, sklearn.linear_model.LinearRegression):
         if x is None or y is None:
             raise ValueError("x and y args must be provided this model type!")
-        model_stats = _init_sklearn_stats(lm, x, y)
+        model_stats = _init_sklearn_stats(lm=lm, x=x, y=y)
 
     else:
         raise TypeError(

diff --git a/lmdiag/statistics/sklearn_stats.py b/lmdiag/statistics/sklearn_stats.py
@@ -10,13 +10,13 @@ def __init__(
         lm: LinearRegression,
         x: np.ndarray,
         y: np.ndarray,
-        cache: bool = True,
+        use_cache: bool = True,
     ) -> None:
         super().__init__()
         self._lm = lm
         self._X = x
         self._y = y
-        self._cache_properties = cache
+        self._use_cache = use_cache
 
     @optionally_cached_property
     def residuals(self) -> np.ndarray:

diff --git a/lmdiag/statistics/statsmodels_stats.py b/lmdiag/statistics/statsmodels_stats.py
@@ -11,11 +11,11 @@ class StatsmodelsStats(StatsBase):
     def __init__(
         self,
         lm: sm.regression.linear_model.RegressionResultsWrapper,
-        cache: bool = True,
+        use_cache: bool = True,
     ) -> None:
         super().__init__()
         self._lm = lm
-        self._cache_properties = cache
+        self._use_cache = use_cache
         self.__ols_influence: Union[OLSInfluence, None] = None
 
     @property

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "lmdiag"
-version = "0.4.0"
+version = "0.4.1"
 description = "Diagnostic Plots for Lineare Regression Models. Similar to plot.lm in R."
 keywords = [
     "lm",
@@ -9,7 +9,9 @@ keywords = [
     "diagnostics",
     "plot",
     "chart",
-    "matplotlib",
+    "linearmodels",
+    "statsmodels",
+    "scikit-learn",
 ]
 readme = "README.md"
 authors = [{ name = "dynobo", email = "[email protected]" }]