Skip to content

Commit

Permalink
upgrade robustbase (0.99-4-1-1) unstable
Browse files Browse the repository at this point in the history
  • Loading branch information
Woomeeme authored and UTsweetyfish committed Feb 24, 2025
1 parent 9eb09de commit ddc5e32
Show file tree
Hide file tree
Showing 111 changed files with 3,800 additions and 2,005 deletions.
1 change: 0 additions & 1 deletion .gitignore

This file was deleted.

16 changes: 10 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
Package: robustbase
Version: 0.93-9
VersionNote: Released 0.93-8 on 2021-06-02 to CRAN
Date: 2021-09-27
Version: 0.99-4-1
VersionNote: Released 0.99-4 on 2024-08-19, 0.99-3 on 2024-07-01 and
0.99-2 on 2024-01-27 to CRAN
Date: 2024-09-24
Title: Basic Robust Statistics
Authors@R: c(person("Martin","Maechler", role=c("aut","cre"), email="[email protected]", comment = c(ORCID = "0000-0002-8685-9910"))
, person("Peter", "Rousseeuw", role="ctb", comment = "Qn and Sn")
Expand All @@ -20,7 +21,10 @@ Authors@R: c(person("Martin","Maechler", role=c("aut","cre"), email="maechler@st
comment = "MM-, tau-, CM-, and MTL- nlrob")
, person("Maria", "Anna di Palma", role = "ctb", comment = "initial version of Comedian")
)
URL: https://robustbase.R-forge.R-project.org/
URL: https://robustbase.R-forge.R-project.org/,
https://R-forge.R-project.org/R/?group_id=59,
https://R-forge.R-project.org/scm/viewvc.php/pkg/robustbase/?root=robustbase,
svn://svn.r-forge.r-project.org/svnroot/robustbase/pkg/robustbase
BugReports: https://R-forge.R-project.org/tracker/?atid=302&group_id=59
Description: "Essential" Robust Statistics.
Tools allowing to analyze data with robust methods. This includes
Expand All @@ -38,7 +42,7 @@ EnhancesNote: linked to in man/*.Rd
LazyData: yes
NeedsCompilation: yes
License: GPL (>= 2)
Packaged: 2021-09-27 07:40:12 UTC; maechler
Packaged: 2024-09-25 09:50:14 UTC; maechler
Author: Martin Maechler [aut, cre] (<https://orcid.org/0000-0002-8685-9910>),
Peter Rousseeuw [ctb] (Qn and Sn),
Christophe Croux [ctb] (Qn and Sn),
Expand All @@ -51,4 +55,4 @@ Author: Martin Maechler [aut, cre] (<https://orcid.org/0000-0002-8685-9910>),
Maria Anna di Palma [ctb] (initial version of Comedian)
Maintainer: Martin Maechler <[email protected]>
Repository: CRAN
Date/Publication: 2021-09-27 10:10:02 UTC
Date/Publication: 2024-09-27 15:30:02 UTC
207 changes: 107 additions & 100 deletions MD5

Large diffs are not rendered by default.

16 changes: 11 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ importFrom("graphics",
abline, axis,
box, legend, lines, matplot, mtext,
panel.smooth, par, plot, points, strheight, text, title)
importFrom("stats",
importFrom("stats", .lm.fit,
aggregate, alias, as.formula, binomial,
confint.lm, dummy.coef.lm, # want to register these as S3 {confint}{lmrob} and {dummy.coef}{..}
coef, cor, cov, cov.wt, cov2cor, delete.response, deviance, dnorm, dpois,
family, fitted, fivenum, formula,
gaussian, glm, glm.fit, hatvalues,
Expand All @@ -32,8 +33,6 @@ importFrom("stats",


## ^^^^ MASS has a bit more; take it as example
if(getRversion() >= "3.1.0")
importFrom("stats", .lm.fit, confint.lm, dummy.coef.lm)
if(getRversion() >= "3.3.0") {
importFrom("stats", sigma)
} else {
Expand Down Expand Up @@ -68,6 +67,7 @@ export(Sn, Qn, Qn.old,
## NO! ddplot, distplot, chi2qqplot
rrcov.control,## << RENAME --- FIXME
huberM,
huberize,
colMedians, rowMedians,
covOGK, covGK, hard.rejection, scaleTau2,
covComed,
Expand All @@ -92,6 +92,7 @@ export(Sn, Qn, Qn.old,
outlierStats,

mc, # Mia Hubers's medcouple
lmc, rmc, # left and right mc, robust measures of tail weight
adjbox,
adjboxStats,
adjOutlyingness,
Expand Down Expand Up @@ -124,9 +125,9 @@ S3method(anova, glmrob)

S3method(alias, lmrob)
S3method(case.names, lmrob)
S3method(confint, lmrob)
S3method(confint, lmrob, confint.lm)## -> R bug (see below) & in R/lmrob.R
S3method(confint, nlrob)
S3method(dummy.coef, lmrob)
S3method(dummy.coef, lmrob, dummy.coef.lm)# (R bug ...)
S3method(estimethod, nlrob)
S3method(family, lmrob)
S3method(hatvalues, lmrob)
Expand All @@ -137,7 +138,12 @@ S3method(nobs, lmrob)
S3method(nobs, lmrob.S, nobs.lmrob)# use the same as "lmrob"
S3method(nobs, mcd)
S3method(residuals, lmrob)
S3method(residuals, lmrob.S)
S3method(variable.names, lmrob)
S3method(print, lmrobCtrl)
S3method(update, lmrobCtrl)
## R Bug {fixed in svn rev 84463}: needs within.list in our NS:
S3method(within, lmrobCtrl, within.list)
S3method(weights, glmrob)
S3method(weights, lmrob)
S3method(weights, lmrob.S, weights.lmrob)# use the same as "lmrob"
Expand Down
7 changes: 6 additions & 1 deletion R/AAA.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ dummy.coef.lm <- function(object, use.na=FALSE, ...)

}# if R <= 3.1.0

## Not exported, and only used because CRAN checks must be faster
## Not exported; used for faster checking, e.g., on CRAN
doExtras <- function() {
interactive() || nzchar(Sys.getenv("R_robustbase_check_extra")) ||
identical("true", unname(Sys.getenv("R_PKG_CHECKING_doExtras")))
Expand Down Expand Up @@ -147,3 +147,8 @@ is.1num <- function(x) is.numeric(x) && length(x) == 1L
} else # default
vc
}


## e.g. for once-per-session warnings:
.optEnv <- new.env(parent = emptyenv(), hash = FALSE)

2 changes: 1 addition & 1 deletion R/BYlogreg.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ BYlogreg <- function(x0, y, initwml=TRUE, # w.x=NULL,
newobj <- mean(phiBY3(scores, y,const))
oldobj <- newobj
grad.BY <- colMeans((derphiBY3(scores,y,const) %*% matrix(1,ncol=p))*x)
h <- -grad.BY + (grad.BY %*% xistart) *xistart
h <- -grad.BY + c(grad.BY %*% xistart) * xistart
finalstep <- h/sqrt(sum(h^2))

if(trace.lev) {
Expand Down
28 changes: 12 additions & 16 deletions R/MTestimador2.R
Original file line number Diff line number Diff line change
Expand Up @@ -350,28 +350,24 @@ glmrobMT <- function(x,y, weights = NULL, start = NULL, offset = NULL,
n <- nrow(x)
p <- ncol(x)
if (is.null(weights))
weights <- rep.int(1, n)
weights <- 1 # rep.int(1, n) {are not stored, nor used apart from 'sni / *' }
else if(any(weights <= 0))
stop("All weights must be positive")
if(!is.null(offset)) stop("non-trivial 'offset' is not yet implemented")
## if (is.null(offset))
## offset <- rep.int(0, n) else if(!all(offset==0))
## warning("'offset' not fully implemented")

linkinv <- family$linkinv
variance <- family$variance

## Copy-paste from ./glmrobMqle.R [overkill currently: Poisson has sni == ni == 1]
ni <- as.vector(weights)
sni <- sqrt(ni)
comp.V.resid <- expression({
Vmu <- variance(mu)
if (any(is.na(Vmu))) stop("NAs in V(mu)")
if (any(Vmu == 0)) stop("0s in V(mu)")
sVF <- sqrt(Vmu) # square root of variance function
residP <- (y - mu)* sni/sVF # Pearson residuals
})

## Copy-paste from ./glmrobMqle.R [overkill currently: Poisson has sni == 1]
sni <- sqrt(as.vector(weights))
V_resid <- function(mu, y) {
Vmu <- family$variance(mu)
if (anyNA(Vmu)) stop("NAs in V(mu)")
if (any(Vmu == 0)) stop( "0s in V(mu)")
## return Pearson residuals :
(y - mu)* sni/sqrt(Vmu)
}

m.approx <- mk.m_rho(cw)
w <- robXweights(weights.on.x, x, intercept=intercept)
Expand Down Expand Up @@ -404,8 +400,8 @@ glmrobMT <- function(x,y, weights = NULL, start = NULL, offset = NULL,
beta <- estim2$par
cov <- covasin(x,y, beta=beta, cw=cw, m.approx=m.approx, w=w)
eta <- as.vector(x %*% beta) # + offset
mu <- linkinv(eta)
eval(comp.V.resid)#-> residP ==(here!) == residPS
mu <- family$linkinv(eta)
residP <- V_resid(mu, y)# residP == (here!) residPS

## As sumaConPesos() computes
## eta <- x %*% beta
Expand Down
64 changes: 44 additions & 20 deletions R/covMcd.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,11 @@ covMcd <- function(x,
logdet.Lrg <- 50 ## <-- FIXME add to rrcov.control() and then use that
## Analyze and validate the input parameters ...
if(length(seed) > 0) {
if(length(seed) < 3 || seed[1L] < 100)
if(length(seed) < 3L || seed[1L] < 100L)
stop("invalid 'seed'. Must be compatible with .Random.seed !")
if(exists(".Random.seed", envir=.GlobalEnv, inherits=FALSE)) {
seed.keep <- get(".Random.seed", envir=.GlobalEnv, inherits=FALSE)
on.exit(assign(".Random.seed", seed.keep, envir=.GlobalEnv))
}
assign(".Random.seed", seed, envir=.GlobalEnv)
if(!is.null(seed.keep <- get0(".Random.seed", envir = .GlobalEnv, inherits = FALSE)))
on.exit(assign(".Random.seed", seed.keep, envir = .GlobalEnv))
assign(".Random.seed", seed, envir = .GlobalEnv)
}

## For back compatibility, as some new args did not exist pre 2013-04,
Expand Down Expand Up @@ -238,7 +236,13 @@ covMcd <- function(x,
ans <- c(ans, cov.wt(x, wt = weights, cor=cor))

if(sum.w != n) {
cdelta.rew <- .MCDcons(p, sum.w/n) ## VT::19.3.2007
## VT::05.04.2023
## The correct consistency correction factor for the reweighted estimate
## would be .MCDcons(p, 0.975) and not .MCDcons(p, sum.w/n) - see mail from
## Andreas Alfons from 29.01.2020 and Croux and Haesbroeck (1999), equations 4.1 and 4.2.
## cdelta.rew <- .MCDcons(p, sum.w/n) ## VT::19.3.2007
cdelta.rew <- .MCDcons(p, 0.975)

correct.rew <- if(use.correction) .MCDcnp2.rew(p, n, alpha) else 1.
cnp2 <- c(cdelta.rew, correct.rew)
ans$cov <- cdelta.rew * correct.rew * ans$cov
Expand Down Expand Up @@ -270,7 +274,12 @@ covMcd <- function(x,
## the MCD location and scatter matrix, the latter being singular
## (as it should be), as well as the equation of the hyperplane.

dim(mcd$coeff) <- c(5, p)
## VT::31.08.2022 - raw.only was not implemeted for the case nsamp="deterministic"
## (if the FORTRAN code is not called mcd$coeff and mcd$weights do not exist).
## Reported by Aurore Archimbaud <[email protected]>
if(!is.null(mcd$coeff))
dim(mcd$coeff) <- c(5, p)

ans$cov <- ans$raw.cov <- mcd$initcovariance
ans$center <- ans$raw.center <- as.vector(mcd$initmean)

Expand All @@ -280,9 +289,7 @@ covMcd <- function(x,
}
ans$n.obs <- n

if(raw.only) {
ans$raw.only <- TRUE
} else {
if(mcd$exactfit != 0) {
## no longer relevant:
## if(mcd$exactfit == -1)
## stop("The program allows for at most ", mcd$kount, " observations.")
Expand All @@ -297,15 +304,27 @@ covMcd <- function(x,
ans$singularity <-
list(kind = "on.hyperplane", exactCode = mcd$exactfit,
p = p, h = h, count = mcd$kount, coeff = mcd$coeff[1,])

ans$crit <- -Inf # = log(0)
weights <- mcd$weights

} else {
## VT::31.08.2022 - raw.only was not implemeted for the case nsamp="deterministic"
ans$raw.only <- TRUE
ans$crit <- mcd$mcdestimate
weights <- mcd$weights
if(is.null(mcd$weights)) {
## FIXME? here, we assume that mcd$initcovariance is not singular:
mah <- mahalanobis(x, mcd$initmean, mcd$initcovariance, tol = tolSolve)
weights <- wgtFUN(mah)
}
}
ans$alpha <- alpha
ans$quan <- h
if(names && !is.null(nms <- dimn[[2]])) {
names(ans$raw.center) <- nms
dimnames(ans$raw.cov) <- list(nms,nms)
}
ans$crit <- -Inf # = log(0)
weights <- mcd$weights

} ## end (raw.only || exact fit)

Expand All @@ -322,7 +341,12 @@ covMcd <- function(x,
## Compute and apply the consistency correction factor for
## the reweighted cov
if(!sing.rewt && sum.w != n) {
cdelta.rew <- .MCDcons(p, sum.w/n) ## VT::19.3.2007
## VT::05.04.2023
## The correct consistency correction factor for the reweighted estimate
## would be .MCDcons(p, 0.975) and not .MCDcons(p, sum.w/n) - see mail from
## Andreas Alfons from 29.01.2020 and Croux and Haesbroeck (1999), equations 4.1 and 4.2.
## cdelta.rew <- .MCDcons(p, sum.w/n) ## VT::19.3.2007
cdelta.rew <- .MCDcons(p, 0.975)
correct.rew <- if(use.correction) .MCDcnp2.rew(p, n, alpha) else 1.
cnp2 <- c(cdelta.rew, correct.rew)
ans$cov <- cdelta.rew * correct.rew * ans$cov
Expand Down Expand Up @@ -519,11 +543,11 @@ print.mcd <- function(x, digits = max(3, getOption("digits") - 3), print.gap = 2
"\n", sep="")
## VT::29.03.2007 - solve a conflict with fastmcd() in package robust -
## also returning an object of class "mcd"
xx <- NA
if(!is.null(x$crit))
xx <- format(x$crit, digits = digits)
else if (!is.null(x$raw.objective))
xx <- format(log(x$raw.objective), digits = digits)
xx <- if(!is.null(x$crit))
format(x$crit, digits = digits)
else if (!is.null(x$raw.objective))
format(log(x$raw.objective), digits = digits)
else NA
cat("Log(Det.): ", xx , "\n\nRobust Estimate of Location:\n")
print(x$center, digits = digits, print.gap = print.gap, ...)
cat("Robust Estimate of Covariance:\n")
Expand Down Expand Up @@ -573,7 +597,7 @@ print.summary.mcd <-
##' (see calfa in Croux and Haesbroeck)
##' @param p
##' @param alpha alpha ~= h/n = quan/n
##' also use for the reweighted MCD, calling with alpha = 'sum(weights)/n'
##' also use for the reweighted MCD, calling with alpha = 0.975
MCDcons <- # <- *not* exported, but currently used in pkgs rrcov, rrcovNA
.MCDcons <- function(p, alpha)
{
Expand Down
7 changes: 5 additions & 2 deletions R/huber.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ huberM <-

if(se && !is.null(weights))
stop("Std.error computation not yet available for the case of 'weights'")
missS <- missing(s)
if(missS && is.na(s)) # e.g. for x = c(-Inf, 1)
s <- 0
if (s <= 0) {
if(s < 0) stop("negative scale 's'")
if(warn0scale && n > 1)
Expand All @@ -40,7 +43,7 @@ huberM <-
it <- it + 1L
y <- pmin(pmax(mu - k * s, x), mu + k * s)
mu1 <- wsum(y) / sum.w
if (abs(mu - mu1) < tol * s)
if (is.na(mu1) || abs(mu - mu1) < tol * s)
break
mu <- mu1
}
Expand All @@ -61,7 +64,7 @@ huber <- function (y, k = 1.5, tol = 1e-06)
if(n == 0) # e.g 'y' was all na
return(list(mu = NA, s = NA))# instead of error
mu <- median(y)
s <- mad(y)
s <- mad(y, center=mu)
if (s == 0) { # FIXME? make this warning optional
if(n > 1) warning("scale MAD is zero for this sample")
}
Expand Down
38 changes: 38 additions & 0 deletions R/huberize.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
## trimmed mad := trimmed *[M]ean* of [A]bsolute [D]eviations (from the median; more generally 'center')
## = === {no default for trim on purpose}
tmad <- function(x, center = median(x), trim, na.rm = FALSE)
{
stopifnot(is.numeric(trim), length(trim) == 1L, 0 <= trim, trim <= 0.5)
if(na.rm)
x <- x[!is.na(x)]
## TODO: consistency correction (for non-large) 'n' as a function of trim
## ---- not needed for huberize() though
## n <- length(x)
mean(abs(x - center), trim=trim)
}

## Estimates mu (optionally, via huberM(.)) and sigma of x
## x: without NA: na.rm=TRUE must have happened
## sets boundaries at M +/- c*sigma
## sets outliers to be equal to lower/upper boundaries
huberize <- function(x, M = huberM(x, k=k)$mu, c = k,
trim = (5:1)/16, # Lukas Graz' MSc thesis had c(0.25, 0.15, 0.075)
k = 1.5,
warn0 = getOption("verbose"), saveTrim = TRUE)
{
stopifnot(is.numeric(M), length(M) == 1,
length(trim) >= 1, trim >= 0, diff(trim) < 0) # trim must be strictly decreasing
qn. <- Qn(x)
j <- 0L
while(!is.na(qn.) && qn. == 0 && j < length(trim))
qn. <- tmad(x, center = M, trim = trim[j <- j+1L])
## ~~~~
if(qn. == 0 && warn0)
warning(sprintf("Qn(x) == 0 and tmad(x, trim=%g) == 0", trim[j]))
upper <- M + qn.*c # qnorm(c,lower.tail = F)
lower <- M - qn.*c
x[x > upper] <- upper
x[x < lower] <- lower
## store the final 'trim' used (if there was one) as attribute:
if(j && saveTrim) structure(x, trim = trim[j]) else x
}
Loading

0 comments on commit ddc5e32

Please sign in to comment.