-
Notifications
You must be signed in to change notification settings - Fork 0
/
gao_sample.txt
60 lines (47 loc) · 1.6 KB
/
gao_sample.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
### Professor X. Gao's Sample R Code
### Original Source can be found at https://xingao.info.yorku.ca/files/2020/01/math3333projectsamplecode.txt
### Photograph examples
## Read in the library and metadata
library(jpeg)
pm <- read.csv("C:\\Users\\xingao\\Documents\\teaching\\winter2020\\math3333\\photoMetaData.csv")
n <- nrow(pm)
trainFlag <- (runif(n) > 0.5)
y <- as.numeric(pm$category == "outdoor-day")
X <- matrix(NA, ncol=3, nrow=n)
for (j in 1:n) {
img <- readJPEG(paste0("C:\\Users\\xingao\\Documents\\teaching\\winter2020\\math3333\\columbiaImages\\columbiaImages\\",pm$name[j]))
X[j,] <- apply(img,3,median)
print(sprintf("%03d / %03d", j, n))
}
# build a glm model on these median values
out <- glm(y ~ X, family=binomial, subset=trainFlag)
out$iter
summary(out)
# How well did we do?
pred <- 1 / (1 + exp(-1 * cbind(1,X) %*% coef(out)))
y[order(pred)]
y[!trainFlag][order(pred[!trainFlag])]
mean((as.numeric(pred > 0.5) == y)[trainFlag])
mean((as.numeric(pred > 0.5) == y)[!trainFlag])
## ROC curve (see lecture 12)
roc <- function(y, pred) {
alpha <- quantile(pred, seq(0,1,by=0.01))
N <- length(alpha)
sens <- rep(NA,N)
spec <- rep(NA,N)
for (i in 1:N) {
predClass <- as.numeric(pred >= alpha[i])
sens[i] <- sum(predClass == 1 & y == 1) / sum(y == 1)
spec[i] <- sum(predClass == 0 & y == 0) / sum(y == 0)
}
return(list(fpr=1- spec, tpr=sens))
}
r <- roc(y[!trainFlag], pred[!trainFlag])
plot(r$fpr, r$tpr, xlab="false positive rate", ylab="true positive rate", type="l")
abline(0,1,lty="dashed")
# auc
auc <- function(r) {
sum((r$fpr) * diff(c(0,r$tpr)))
}
glmAuc <- auc(r)
glmAuc