SVM.Rmd

---
title: "SVM"
author: "Nicole Zimmer"
date: "10/27/2020"
output: html_document
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r message=FALSE}
library(tidyverse)
library(ggplot2)
library(lubridate)
library(patchwork)
library(gridExtra)
library(psych)
library(corrplot)
library(ggfortify)
library(factoextra)
library(class) #knn
library(gmodels) # CrossTable()
library(caret) # creatFolds()
library(caTools) #sample.split()
library(ROCR) # prediction(), performance()
library(MLeval)
library(MLmetrics)
library(sparseLDA)
library(kernlab)
set.seed(123)
```
Import time features (remove later with train_df)
PCA to find top contributors
Standardize/scale dataset before PCA

```{r}
df <- read_csv("time_features.csv")
targets <- df$Targets
standardized = scale(df[,2:ncol(df)], center=TRUE, scale=TRUE)
standardized = cbind(standardized, targets)
df_standard = as_tibble(standardized)
#standardized_df = standardized_df %>%
#  mutate(target = V1) %>%
#  select(-V1)
#summary(standardized_df)
#head(standardized_df)

sample <- sample.split(df_standard$mean_Falls_df.accX,SplitRatio = 0.8)
train_ti <- subset(df_standard,sample==TRUE)
train_ti <- train_ti[-c(1), ]
test_ti <- subset(df_standard, sample==FALSE)
test_ti <- test_ti[-c(1, 2, 3), ]
train_fq <- read_csv("Train_feat_df.csv") # this is actually overall dataframe
test_fq <- read_csv("Test_feat_df.csv") # this is actually overall dataframe

```
```{r}
build_SVM_CV2 <- function(k, train_df, test_df){
  train_df$targets[train_df$targets == 1] <- "yes"
  train_df$targets[train_df$targets == 0] <- "no"
  myControl <- trainControl(
                             method = "repeatedcv", number = k,
                             summaryFunction = twoClassSummary,
                             classProbs = TRUE,
                             verboseIter = TRUE,
                             savePredictions = TRUE
                            )
  #myGrid <- expand.grid(                    
                        #C = c(0.25, 0.5, 1),
                        #degree= c(2,3,4),
                        #scale = c(.001, .01, .1)
                        #)
  
  set.seed(33)
  model <- train(targets ~., 
                 data = train_df, 
                 method = "svmPoly", 
                 #tuneGrid = myGrid, 
                 metric = "ROC",
                 trControl = myControl,
                 tuneLength=3)
  
  #Check the model
  model
  plot(model)
  model$bestTune
  max((model$results)$ROC)
  res <- evalm(model)
  model$roc
}

build_SVM_CV2(5, train_ti, test_ti)

```


```{r}
build_SVM_CV2 <- function(k, train_df, test_df){
  train_df$targets[train_df$targets == 1] <- "yes"
  train_df$targets[train_df$targets == 0] <- "no"
  myControl <- trainControl(
                             method = "repeatedcv", number = k,
                             summaryFunction = twoClassSummary,
                             classProbs = TRUE,
                             verboseIter = TRUE,
                             savePredictions = TRUE
                            )
  myGrid <- expand.grid(                    
                        C = c(0.25, 0.5, 0.75),
                        degree= c(2,3,4),
                        scale = c(0.001, 0.01, 0.1)
                        )
  
  set.seed(33)
  model <- train(targets ~., 
                 data = train_df, 
                 method = "svmPoly", 
                 tuneGrid = myGrid, 
                 metric = "ROC",
                 trControl = myControl
                 )
  
   #Check the model
  model
  plot(model)
  model$bestTune
  max((model$results)$ROC)
  res <- evalm(model)
  model$roc
}

build_SVM_CV2(5, train_ti, test_ti)

```