-
Notifications
You must be signed in to change notification settings - Fork 0
/
LinearDiscriminantAnalysis.Rmd
88 lines (78 loc) · 2.32 KB
/
LinearDiscriminantAnalysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
---
title: "LinearDiscriminantAnalysis"
author: "Cyrus Tanade"
date: "10/27/2020"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r message=FALSE}
library(tidyverse)
library(ggplot2)
library(lubridate)
library(patchwork)
library(gridExtra)
library(psych)
library(corrplot)
library(ggfortify)
library(factoextra)
library(class) #knn
library(gmodels) # CrossTable()
library(caret) # creatFolds()
library(caTools) #sample.split()
library(ROCR) # prediction(), performance()
library(MLeval)
library(MLmetrics)
library(sparseLDA)
set.seed(123)
```
Import time features (remove later with train_df)
PCA to find top contributors
Standardize/scale dataset before PCA
```{r}
df <- read_csv("time_features.csv")
targets <- df$Targets
standardized = scale(df[,2:ncol(df)], center=TRUE, scale=TRUE)
standardized = cbind(standardized, targets)
df_standard = as_tibble(standardized)
#standardized_df = standardized_df %>%
# mutate(target = V1) %>%
# select(-V1)
#summary(standardized_df)
#head(standardized_df)
sample <- sample.split(df_standard$mean_Falls_df.accX,SplitRatio = 0.8)
train_ti <- subset(df_standard,sample==TRUE)
train_ti <- train_ti[-c(1), ]
test_ti <- subset(df_standard, sample==FALSE)
test_ti <- test_ti[-c(1, 2, 3), ]
train_fq <- read_csv("Train_feat_df.csv") # this is actually overall dataframe
test_fq <- read_csv("Test_feat_df.csv") # this is actually overall dataframe
```
```{r}
library(MASS)
build_LDA_CV2 <- function(k, train_df, test_df){
train_df$targets[train_df$targets == 1] <- "yes"
train_df$targets[train_df$targets == 0] <- "no"
myControl <- trainControl(
method = "repeatedcv", number = k,
summaryFunction = twoClassSummary,
classProbs = TRUE,
verboseIter = TRUE,
savePredictions = TRUE
)
myGrid <- expand.grid(.NumVars = c(2:10),
.lambda = c(0, 0.01, 0.1, 1, 10, 100))
set.seed(33)
model <- train(targets ~.,
data = train_df,
method = "sparseLDA",
tuneGrid = myGrid,
metric = "ROC",
trControl = myControl)
#Check the model
model
plot(model)
}
build_LDA_CV2(5, train_ti, test_ti)
```