-
Notifications
You must be signed in to change notification settings - Fork 0
/
assigment2_functions_macode.r
59 lines (55 loc) · 1.5 KB
/
assigment2_functions_macode.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
prop_na <- function(xv) {
sum(is.na(xv))/length(xv)
}
io_scale <- function(var_not=c(), data) {
##Scale variables between 0 and 1.
##Can't have missing values.
##Return the dataset with the variables marked as
##no.dep scaled.
no.dep <- !colnames(data) %in% var_not
bool <- any(!complete.cases(data[, no.dep]))
if(bool) {
warning('Incomplete cases')
}
data[, no.dep] <- apply(data[, no.dep], 2,
function(x) {
(x - min(x))/(max(x)-min(x))
})
return(data)
}
get_maxcors <- function(var_aimed, data, ncors=10, na.rm=TRUE) {
##Compute correlations of var_aimed with every other variables
##in data. Return the ncors largest correlations.
if(ncol(data[, -1]) < ncors) {
ncors <- ncol(data[, -1])
}
if(na.rm) {
data <- data[complete.cases(data), ]
}
Y <- data[[var_aimed]]
Xcols <- data[, colnames(data) != var_aimed]
corv <- sapply(Xcols, function(x) cor(Y, x))
corv.names <- paste0(var_aimed, ':', colnames(Xcols))
names(corv) <- corv.names
corv <- corv[order(-corv)]
corv <- corv[1:ncors]
return(corv)
}
is_outlier <- function(x, thres.mult=2.5) {
q1 <- quantile(x, 0.25, na.rm=TRUE)
xmed <- quantile(x, 0.5, na.rm=TRUE)
q3 <- quantile(x, 0.75, na.rm=TRUE)
range <- q3 - q1
finder <- sapply(x, function(i) {
if(is.na(i)) {
FALSE
}
else if(abs(i) > xmed + thres.mult * range) {
TRUE
}
else {
FALSE
}
})
return(finder)
}