From 2bcd719a5ffff16a1285068097fd64b0158079b7 Mon Sep 17 00:00:00 2001
From: Michael Friendly <friendly@yorku.ca>
Date: Sat, 2 Nov 2024 18:48:12 -0400
Subject: [PATCH] pvPlot now uses car::dataEllipse

---
 R/pvPlot.R           | 184 ++++++++++++++++++++++++++++++++-----------
 child/03-network.qmd |   5 --
 2 files changed, 138 insertions(+), 51 deletions(-)

diff --git a/R/pvPlot.R b/R/pvPlot.R
index 1c51cd9..8171656 100644
--- a/R/pvPlot.R
+++ b/R/pvPlot.R
@@ -3,23 +3,89 @@
 # To understand the partial correlations, make scatterplots of the residuals from the
 # models where each x_i, x_j are predicted by all others. 
 
-# see also: pairs version
+# see also: stuff relevant to a pairs version
 # https://stackoverflow.com/questions/35591033/plot-scatterplot-matrix-with-partial-correlation-coefficients-in-r
 
-# TODO: make show.partial take a list(loc = c(x,y), cex = )
-# TODO: make id take a list of options
-
+# DONE: make show.partial take a list(loc = c(x,y), cex = )
+# DONE: make id take a list of options
+# DONE: use dataEllipse() instead of scatterplot()
+
+# TODO: be able to use ellipse = FALSE
+# BUG:  passing either ellipse or cex to dataEllipse() -> error
+#       Error in text.default(x, y, label, pos = pos, xpd = xpd, col = col, ...) : 
+#       formal argument "cex" matched by multiple actual arguments
+#       Trying to pass 
+
+#' Partial variables plot
+#' 
+#' @description
+#' A partial variable plot is a visualization of a partial correlation of two variables in the context
+#' of other variables in a dataset. For two variables \eqn{x_i} and \eqn{x_j}, 
+#' it is simply an enhanced scatterplot of the \emph{partial residuals},
+#' \eqn{e_i = (x_i - \hat{x}_i)} from a regression of \eqn{x_i} on all other variables \eqn{Z}
+#' against those \eqn{e_j = (x_j - \hat{x}_j)} for another variable $x_j$.
+#' The basic scatterplot of these residuals can be enhanced by also showing the data ellipse
+#' of these residuals and point labels to identify unusual data.
+#' (These plots are produced by SAS \code{PROC CORR}, with the \code{PARTIAL} and \code{SCATTER}
+#' statements.)
+#' 
+#' Partial variable plots are intimately related to an \emph{added-variable plot},
+#' such as produced by \code{\link[car]{avPlots}}. However, that implementation
+#' is designed for a linear model, rather than a data.frame.
+#' 
+#'
+#' @param X     a data.frame of numeric variables
+#' @param vars  either the character names of two variables in \code{X} or their indices
+#' @param labels id labels for the points. If not supplied, rownames of the dataset are used.
+#' @param id    controls point identification; if \code{FALSE} (the default), no points are identified; 
+#'              can be a list of named arguments to the \code{\link[car]{showLabels} function
+#' @param ellipse    logical; whether to draw the data ellipse. Not presently used
+#' @param ellipse.args  a list of other arguments for the ellipse
+#' @param draw     logical; if \code{TRUE} produce graphical output; if \code{FALSE}, only invisibly return 
+#'              coordinates of ellipse(s).
+#' @param col   color used for points, ...
+#' @param pch   the plotting character
+#' @param cex   Character expansion for points and labels. Not presently used
+#' @param axes  logical; if \code{TRUE} (the default), grey axes lines are drawn at 0 on both coordinates
+#' @param regline logical; if \code{TRUE} (the default), draws the regression line for the partial residuals
+#'        of \code{vars[2]} on those for \code{vars[1]}.
+#' @param show.partial controls whether the partial correlation value is displayed in the plot. If \code{FALSE}
+#'              the value is not shown. Otherwise, can be a list containing the location (\code{loc}) and 
+#'              character size (\code{cex}) of the label. 
+#' @param ...   other arguments passed to \code{\link[car]{dataEllipse}}
+#'
+#' @return      This functions is mainly used for their side effect of producing plots. For greater 
+#'              flexibility (e.g., adding plot annotations), it returns invisibly the coordinates of the residuals
+#'              plotted.
+#' @export
+#' @author Michael Friendly
+#' @importFrom car dataEllipse
+#' @examples
+#' data(crime, package = "ggbiplot")
+#' crime.num <- crime |>
+#'   tibble::column_to_rownames("st") |>
+#'   dplyr::select(where(is.numeric))
+#'   
+#' pvPlot(crime.num, vars = c("burglary", "larceny"))
+#' pvPlot(crime.num, vars = c("auto", "robbery"))
+#' 
+#' # bugged:
+#' pvPlot(crime.num, vars = c("burglary", "larceny"), ellipse=FALSE)
+#' 
 pvPlot <- function(
    X, 
    vars = 1:2,
    labels,
    id = FALSE, 
-   ellipse=FALSE, 
+   ellipse=TRUE,
+   ellipse.args = list(levels = 0.68, fill = TRUE, fill.alpha = 0.05, robust=FALSE, col="black"),
+   draw = TRUE,
    col = "black", 
    pch = 16, 
    cex = par("cex"),
    axes = TRUE,
-   show.partial = TRUE,
+   regline = TRUE,
+   show.partial = list(loc = c(0.025, 0.95), cex = 1.2),
    ...) {
 
   nv <- ncol(X)
@@ -28,6 +94,7 @@ pvPlot <- function(
   # variables, as names, even if vars is numeric
   all <- names(X)
   vars <- if(is.numeric(vars)) names(X)[vars] else vars
+  # TODO: make others an argument, so it's not necessary to partial _all_ others
   others <- setdiff(all, vars)
 
   # variables for this plot  
@@ -36,52 +103,53 @@ pvPlot <- function(
 
   # get the partial residuals fitting each var from all the others
   res <- X[, vars]
+  rownames(res) <- rownames(X)
   res[, 1] <- lsfit(X[, others], X[, v1])$residuals
   res[, 2] <- lsfit(X[, others], X[, v2])$residuals
-  # plot(res, 
-  #      col = col, pch = pch, cex = cex, ...)
 
-  xlab <- paste(vars[1], "residual")
-  ylab <- paste(vars[2], "residual")
+  xlab <- paste(vars[1], "| others")
+  ylab <- paste(vars[2], "| others")
   labels <- if (missing(labels)) rownames(X) else labels
   
   applyDefaults <- car:::applyDefaults
-  id <- applyDefaults(id, defaults=list(method="mahal", 
-                                       n=5, cex=1, 
-                                       col="black", 
-                                       location="lr"), type="id")
-  if (isFALSE(id)){
-    id.n <- 0
-    id.method <- "mahal"
-    labels <- id.cex <- id.col <- id.location <- NULL
-  }
-  else{
-    labels <- id$labels
-    id.method <- id$method
-    id.n <- if ("identify" %in% id.method) Inf else id$n
-    id.cex <- id$cex
-  #    id.col <- if (by.groups) id$col else id$col[1]
-    id.location <- id$location
-  }
+  id <- applyDefaults(id, 
+                      defaults=list(method="mahal", 
+                                    n=5, cex=1, 
+                                    col="black", 
+                                    location="lr"), type="id")
+
+  if (draw) {
+
+    # handle ellipse args
+    levels <- ellipse.args$levels %||% 0.68
+    fill <-   ellipse.args$fill %||% TRUE
+    fill.alpha <- ellipse.args$alpha %||% 0.05
+    robust <- ellipse.args$robust %||% FALSE
+  
+    dataEllipse(res[, 2] ~ res[, 1], data = res,
+                xlab = xlab, ylab = ylab,
+                pch = pch, col = col,
+ #               cex = cex,
+                ellipse = ellipse, # ellipse.label = "",
+                levels = levels, fill = fill, fill.alpha = fill.alpha, robust=robust,
+                grid = FALSE,
+                id = id,
+                ...)
+    
+    if (axes) abline(h = 0, v = 0, col = "gray")
+    if (regline) abline(lm(res[, 2] ~ res[, 1], data = res), lwd = 2)
   
-  car::scatterplot(res[, 1], res[, 2],
-    xlab = xlab, ylab = ylab,
-    pch = pch, col = col, cex = cex,
-    ellipse = ellipse,
-    smooth = FALSE, boxplots = FALSE,
-    grid = FALSE,
-    id = list(n=5, labels = labels),      # make it id = id, using our defaults
-    ...)
-
-  if (axes)
-    abline(h = 0, v = 0, col = "gray")
-
-  if (show.partial) {
-    pcor <- round(cor(res[,1], res[,2]), 3)
-    usr <- par("usr")        # save old user/default/system coordinates
-    par(usr = c(0, 1, 0, 1)) # new relative user coordinates
-    text(0.025, 0.95, label = paste("partial r =", pcor),  pos = 4, cex = 1.25)
-    par(usr = usr)           # restore original user coordinates
+    if (!isFALSE(show.partial)) {
+        loc <- show.partial$loc %||% c(0.025, 0.95)
+        cex <- show.partial$cex %||% 1.2
+        pos <- show.partial$pos %||% 4
+
+        pcor <- round(cor(res[,1], res[,2]), 3)
+        usr <- par("usr")        # save old user/default/system coordinates
+        par(usr = c(0, 1, 0, 1)) # new relative user coordinates
+        text(loc[1], loc[2], label = paste("partial r =", pcor),  pos = pos, cex = cex)
+        par(usr = usr)           # restore original user coordinates
+    }
   }
   
   invisible(res)
@@ -123,5 +191,29 @@ p2 <- image_read("images/crime-pvPlot2.png")
 p12 <- image_append(c(p1, p2), stack = FALSE)
 image_write(p12, path="images/crime-pvPlot-1-2.png", format="png")
 
-  
+#  try using dataEllipse instead
+
+res <- pvPlot(crime.num, vars = c("burglary", "larceny"), draw = FALSE)
+dataEllipse(larceny ~ burglary, data = res,
+            levels = 0.68, fill = TRUE, fill.alpha = 0.05, robust=FALSE,
+            grid = FALSE,
+            pch = 16,
+            id = list(n=5))
+abline(h = 0, v = 0, col = "gray")
+abline(lm(larceny ~ burglary, data = res), lwd=2)
+
+# now, using dataEllipse
+pvPlot(crime.num, vars = c("burglary", "larceny"), 
+       id = list(n=5),
+       cex.lab = 1.5)
+
+# now, combine side by side
+op <- par(mar = c(5, 5, 1, 0)+.5,
+          mfrow = c(1, 2))
+pvPlot(crime.num, vars = c("burglary", "larceny"), 
+       id = list(n=5), cex.lab = 1.5)
+pvPlot(crime.num, vars = c("robbery", "auto"), 
+       id = list(n=5), cex.lab = 1.5)
+par(op)
+
 }
\ No newline at end of file
diff --git a/child/03-network.qmd b/child/03-network.qmd
index 21c5682..caf5739 100644
--- a/child/03-network.qmd
+++ b/child/03-network.qmd
@@ -261,15 +261,10 @@ crime.num <- crime |>
   tibble::column_to_rownames("st") |>
   dplyr::select(where(is.numeric))
 
-ellipse.args <- list(levels = 0.68, 
-                     fill.alpha = 0.1, 
-                     robust = FALSE)
 pvPlot(crime.num, vars = c("burglary", "larceny"), 
-       ellipse = ellipse.args,
        id = list(n=5),
        cex.lab = 1.5)
 pvPlot(crime.num, vars = c("robbery", "auto"),
-       ellipse = list(levels = 0.68, fill.alpha = 0.1, robust = FALSE),
        id = list(n=5),
        cex.lab = 1.5)
 ```