Skip to content

Commit

Permalink
add lad optimization script
Browse files Browse the repository at this point in the history
  • Loading branch information
deepayan committed Nov 9, 2024
1 parent 96c51e6 commit 72161df
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/2024-01-DE/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ <h3>R scripts</h3>
<ul>
<li><p><a href="scripts/survey-script.R">Preliminary analysis of survey data</a></p></li>
<li><p><a href="scripts/bivariate.R">Summarizing bivariate data</a></p></li>
<li><p><a href="scripts/lad-optimization.R">Optimization for LAD regression</a></p></li>
</ul>

<h3>Resources</h3>
Expand Down
18 changes: 18 additions & 0 deletions docs/2024-01-DE/scripts/bivariate.R
Original file line number Diff line number Diff line change
Expand Up @@ -239,5 +239,23 @@ xyplot(BPSysAve ~ Age, data = nhSubF, grid = TRUE,
type = c("p", "smooth"), abline = abNH.FLAD, col.line = "red")


## Is R^2 == r^2

R2 <- function(x, y) {
if (length(x) != length(y)) stop("Lengths and x and y do not match")
skip <- is.na(x) | is.na(y)
x <- x[!skip]
y <- y[!skip]
ab <- lsfit(x, y)
r <- cor(x, y)
T2 <- sum((y - mean(y))^2)
S2 <- sum((y - ab[1] - ab[2] * x)^2)
R2 <- 1 - S2 / T2
c(r2 = r^2, R2 = R2)
}

R2(runif(20), runif(20))
R2(runif(20), rnorm(20))



85 changes: 85 additions & 0 deletions docs/2024-01-DE/scripts/lad-optimization.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@

## optimization for LAD regression

lad_loss_function <- function(x, y) {
if (length(x) != length(y)) stop("Lengths and x and y do not match")
skip <- is.na(x) | is.na(y)
x <- x[!skip]
y <- y[!skip]
lambda <- function(a, b) {
if (length(a) != 1) stop("a must be scalar")
if (length(b) != 1) stop("b must be scalar")
e <- abs(y - a - b * x)
sum(e)
}
lambda
}


DavisLoss <- with(Davis, lad_loss_function(x = height, y = weight))

DavisLoss
DavisLoss(2, 4)

## scope: x and y are found (in scope) because x and y are available
## in the function that defined this function. This kind of scoping
## is known as "lexical scoping".

## Question: How to find (a, b) which minimizes DavisLoss(a, b)?

## fix a=0, and vary b

bseq <- seq(0, 10, length.out = 21)

## loops (for vs sapply)

f <- numeric(length(bseq))
for (i in seq_along(f)) {
f[i] <- DavisLoss(a = 0, b = bseq[i])
}

## alternative using sapply() / vapply()


sapply(bseq, function(b) DavisLoss(a = 0, b = b))

sapply(bseq, DavisLoss, a = 0)

## safer version of sapply where expected result type is specified
vapply(bseq, DavisLoss, a = 0, FUN.VALUE = numeric(1))


bseq <- seq(0, 2, length.out = 201)
f <- vapply(bseq, DavisLoss, a = 0, FUN.VALUE = numeric(1))

plot(f ~ bseq, type = "l")

g <-
expand.grid(a = seq(-500, 500, length.out = 200),
b = seq(-3, 3, length.out = 200))

g$loss <- 0

for (i in seq_along(g$loss)) {
g$loss[i] <- DavisLoss(g$a[i], g$b[i])
}

g[which.min(g$loss), ]

## contour plots / image plots or level plots

library(lattice)

contourplot(loss ~ a + b, data = g)

levelplot(loss ~ a + b, data = g)

wireframe(loss ~ a + b, data = g, shade = TRUE)

plot(weight ~ height, Davis)
abline(abDavisLAD)
abline(-113.0653, 1.040201, col = "red")




0 comments on commit 72161df

Please sign in to comment.