forked from ed-wilkes/predictive-modelling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredictBinaryClasses.R
44 lines (36 loc) · 1.82 KB
/
predictBinaryClasses.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#' @name predictBinaryClasses
#' @author Ed Wilkes
#'
#' @description Takes tuned thresholds from findProbCutoff and makes hard class calls on
#' outputs from performedNestedCVParallel
#'
#' @param predictions 'predictions' object from performNestedCVParallel output
#' @param thresholds Threshold data frame from findProbCutoff output
#' @param target String denoting the target, positive class
#'
#' @return List of data frames containing the predictions made using the tuned probability
#' thresholds from findProbCutoff
#'
predictBinaryClasses <- function(predictions, thresholds, target) {
## Split input data frame into separate lists for each fold
predictions <- split(predictions, predictions$fold)
## Find names of positive and negative classes
label_vector <- unique(predictions[[1]]$obs)
if(length(label_vector) > 2) {
stop("There are more than two levels in the outcome variable, binary classification
is not possible!")
}
neg_class <- as.character(label_vector[which(label_vector != target)])
pos_class <- as.character(label_vector[which(label_vector == target)])
## Loop across predictions and make hard prediction based on tuned threshold
for(rep in 1:length(predictions)) {
threshold <- thresholds$best_prob[which(thresholds$fold == names(predictions)[rep])]
# Predict class based on probability threshold
colnames(predictions[[rep]])[which(colnames(predictions[[rep]]) == "pred")] <- "pred_num"
predictions[[rep]]$pred <- factor(ifelse(predictions[[rep]]$pred_num >= threshold
,yes = pos_class
,no = neg_class)
,levels = levels(predictions[[rep]]$obs))
}
return(bind_rows(predictions))
}