mfrasco · jmcastagnetto · Jul 12, 2019 · Jul 12, 2019 · Jul 12, 2019 · Jul 12, 2019
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,4 +4,5 @@ proj$
 ^NEWS.*$
 .travis.yml
 ^CODE_OF_CONDUCT\.md$
-^CONTRIBUTING\.md$
+^CONTRIBUTING\.md$
+^.*\.Rproj$
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+.RData
+*.Rproj
+
 # R History
 *.Rhistory
 
@@ -6,3 +9,4 @@
 
 # Project files
 [Bb]uild/
+.Rproj.user
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -9,11 +9,12 @@ Description: An implementation of evaluation metrics in R that are commonly
 Authors@R: c(
     person("Ben", "Hamner", role = c("aut", "cph"), email = "[email protected]"),
     person("Michael", "Frasco", role = c("aut", "cre"), email = "[email protected]"),
-    person("Erin", "LeDell", role = c("ctb"), email = "[email protected]"))
+    person("Erin", "LeDell", role = c("ctb"), email = "[email protected]"),
+    person("Jesus", "Castagnetto", role = c("ctb"), email = "[email protected]"))
 Maintainer: Michael Frasco <[email protected]>
 Suggests:
     testthat
 URL: https://github.com/mfrasco/Metrics
 BugReports: https://github.com/mfrasco/Metrics/issues
 License: BSD_3_clause + file LICENSE
-RoxygenNote: 6.1.0
+RoxygenNote: 6.1.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -12,16 +12,24 @@ export(ce)
 export(explained_variation)
 export(f1)
 export(fbeta_score)
+export(fdr)
+export(fnr)
+export(fomr)
+export(fpr)
 export(ll)
 export(logLoss)
+export(lrn)
+export(lrp)
 export(mae)
 export(mape)
 export(mapk)
 export(mase)
 export(mdae)
 export(mse)
 export(msle)
+export(npv)
 export(percent_bias)
+export(ppv)
 export(precision)
 export(rae)
 export(recall)
@@ -30,7 +38,9 @@ export(rmsle)
 export(rrse)
 export(rse)
 export(se)
+export(sensitivity)
 export(sle)
 export(smape)
+export(specificity)
 export(sse)
 importFrom(stats,median)
diff --git a/R/binary_classification.R b/R/binary_classification.R
@@ -12,7 +12,7 @@ NULL
 #' Area under the ROC curve (AUC)
 #'
 #' \code{auc} computes the area under the receiver-operator characteristic curve (AUC).
-#' 
+#'
 #' \code{auc} uses the fact that the area under the ROC curve is equal to the probability
 #' that a randomly chosen positive observation has a higher predicted value than a
 #' randomly chosen negative value. In order to compute this probability, we can
@@ -80,56 +80,58 @@ logLoss <- function(actual, predicted) {
 
 
 #' Precision
-#' 
+#'
 #' \code{precision} computes proportion of observations predicted to be in the
 #'                  positive class (i.e. the element in \code{predicted} equals 1)
-#'                  that actually belong to the positive class (i.e. the element 
+#'                  that actually belong to the positive class (i.e. the element
 #'                  in \code{actual} equals 1)
-#'     
+#'
 #' @inheritParams params_binary
 #' @export
-#' @seealso \code{\link{recall}} \code{\link{fbeta_score}}
-#' @examples 
+#' @seealso \code{\link{recall}} \code{\link{fbeta_score}} \code{\link{ppv}}
+#' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 1, 1, 1, 1, 1)
 #' precision(actual, predicted)
 precision <- function(actual, predicted) {
-    return(mean(actual[predicted == 1]))
+    cm <- confusion_matrix(actual, predicted)
+    cm$tp / (cm$tp + cm$fp)
 }
 
 #' Recall
-#' 
+#'
 #' \code{recall} computes proportion of observations in the positive class
 #'               (i.e. the element in \code{actual} equals 1) that are predicted
 #'               to be in the positive class (i.e. the element in \code{predicted}
 #'               equals 1)
-#'     
+#'
 #' @inheritParams params_binary
 #' @export
 #' @seealso \code{\link{precision}} \code{\link{fbeta_score}}
-#' @examples 
+#' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' recall(actual, predicted)
 recall <- function(actual, predicted) {
-    return(mean(predicted[actual == 1]))
+    cm <- confusion_matrix(actual, predicted)
+    cm$tp / (cm$tp + cm$fn)
 }
 
 #' F-beta Score
-#' 
+#'
 #' \code{fbeta_score} computes a weighted harmonic mean of Precision and Recall.
 #'                    The \code{beta} parameter controls the weighting.
-#'                    
+#'
 #' @inheritParams params_binary
-#' @param beta A non-negative real number controlling how close the F-beta score is to 
-#'             either Precision or Recall. When \code{beta} is at the default of 1, 
+#' @param beta A non-negative real number controlling how close the F-beta score is to
+#'             either Precision or Recall. When \code{beta} is at the default of 1,
 #'             the F-beta Score is exactly an equally weighted harmonic mean.
-#'             The F-beta score will weight toward Precision when \code{beta} is less 
+#'             The F-beta score will weight toward Precision when \code{beta} is less
 #'             than one.  The F-beta score will weight toward Recall when \code{beta} is
 #'             greater than one.
 #' @export
 #' @seealso \code{\link{precision}} \code{\link{recall}}
-#' @examples 
+#' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' recall(actual, predicted)
@@ -138,3 +140,244 @@ fbeta_score <- function(actual, predicted, beta = 1) {
     rec <- recall(actual, predicted)
     return((1 + beta^2) * prec * rec / ((beta^2 * prec) + rec))
 }
+
+#' Binary confusion matrix
+#'
+#' \code{confusion_matrix} Calculates a binary classification confusion matrix,
+#' comparing the predicted with the actual values for the classes.
+#' Assumes that 1 is used for the positive class and 0 for the
+#' negative class.
+#'
+#' Returns a \code{data.frame} with columns corresponding to the
+#' number of True Positives (\code{tp}), False Positives (\code{fp}),
+#' True Negatives (\code{tn}), and False Negatives (\code{fn})
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{sensitivity}} \code{\link{specificity}}
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' confusion_matrix(actual, predicted)
+confusion_matrix <- function(actual, predicted) {
+    binvals <- c(0, 1)
+    b_actual <- unique(actual)
+    b_predicted <- unique(predicted)
+
+    # ideally "actual" should be a combination of 0s and 1s,
+    # but could be all 0s or all 1s as degenerate cases
+    if (!(
+        setequal(binvals, b_actual) |
+        setequal(c(0), b_actual) |
+        setequal(c(1), b_actual)
+    )) {
+        stop(paste("Expecting a vector of 0s and 1s for 'actual'. Got:",
+                   paste(actual, collapse = ", ")))
+    }
+
+    # "predicted" could be all 0s, all 1s, or a combination
+    if (!(
+        setequal(binvals, b_predicted) |
+        setequal(c(0), b_predicted) |
+        setequal(c(1), b_predicted)
+    )) {
+        stop(paste("Expecting a vector of 0s and 1s for 'predicted'. Got:",
+                    paste(predicted, collapse = ", ")))
+    }
+
+    if (length(actual) != length(predicted)) {
+        stop(
+            paste(
+                "Size of 'actual' and 'predicted' are not the same: length(actual):",
+                length(actual), "!= length(predicted):", length(predicted)
+            )
+        )
+    }
+
+    # explicit comparisons
+    data.frame(
+        "tp" = sum(actual == 1 & predicted == 1),
+        "fn" = sum(actual == 1 & predicted == 0),
+        "fp" = sum(actual == 0 & predicted == 1),
+        "tn" = sum(actual == 0 & predicted == 0)
+        )
+}
+
+#' Sensitivity
+#'
+#' \code{sensitivity} calculates the proportion of actual positives (\code{actual} equals 1)
+#' that are correctly identified as such. It is also known as
+#' \code{true positive rate} or \code{recall}.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{confusion_matrix}} \code{\link{specificity}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' sensitivity(actual, predicted)
+sensitivity <- function(actual, predicted) {
+    recall(actual, predicted)
+}
+
+#' Specificity
+#'
+#' \code{specificity} calculates the proportion of actual negatives (\code{actual} equals 0)
+#' that are correctly identified as such. It is also known as
+#' \code{true negative rate}.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{confusion_matrix}} \code{\link{sensitivity}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' specificity(actual, predicted)
+specificity <- function(actual, predicted) {
+    cm <- confusion_matrix(actual, predicted)
+    cm$tn / (cm$tn + cm$fp)
+}
+
+#' False Negative Rate
+#'
+#' \code{fnr} calculates the proportion of actual positives (\code{actual} equals 1)
+#' that are not identified as such.
+#'
+#' It is defined as \code{1 - sensitivity}
+#'
+#' @inheritParams params_binary
+#' @export
+#' @seealso \code{\link{confusion_matrix}} \code{\link{sensitivity}}
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' fnr(actual, predicted)
+fnr <- function(actual, predicted) {
+    1 - sensitivity(actual, predicted)
+}
+
+#' False Positive Rate
+#'
+#' \code{fnr} calculates the proportion of actual negative values (\code{actual} equals 0)
+#' that are not identified as such.
+#'
+#' It is defined as \code{1 - specificity}
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{confusion_matrix}} \code{\link{specificity}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' fpr(actual, predicted)
+fpr <- function(actual, predicted) {
+    1 - specificity(actual, predicted)
+}
+
+#' Positive Predictive Value
+#'
+#' \code{ppv} calculates the proportion of all predicted positive values (\code{predicted} equals 1) that
+#' are true positive results. It is also known as \code{precision}
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{confusion_matrix}} \code{\link{npv}} \code{\link{precision}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' ppv(actual, predicted)
+ppv <- function(actual, predicted) {
+    precision(actual, predicted)
+}
+
+#' Negative Predictive Value
+#'
+#' \code{ppv} calculates the proportion all predicted negative values (\code{predicted} equals 0) that
+#' are true negative results.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{confusion_matrix}} \code{\link{npv}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' npv(actual, predicted)
+npv <- function(actual, predicted) {
+    cm <- confusion_matrix(actual, predicted)
+    cm$tn / (cm$tn + cm$fn)
+}
+
+#' False Discovery Rate
+#'
+#' \code{fdr} computes proportion of observations predicted to be in
+#' the positive class (i.e. the element in \code{predicted} equals 1) that
+#' actually belong to the negative class (i.e.the element in \code{actual} equals 0).
+#'
+#' It is implemented as \code{1 - ppv}
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{confusion_matrix}} \code{\link{ppv}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' fpr(actual, predicted)
+fdr <- function(actual, predicted) {
+    1 - ppv(actual, predicted)
+}
+
+#' False Omission Rate
+#'
+#' \code{fomr} is the complement of the Negative Predictive
+#' Value (\code{npv}), and is the proportion of negative
+#' results that are false negatives.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{confusion_matrix}} \code{\link{npv}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' fomr(actual, predicted)
+fomr <- function(actual, predicted) {
+    1 - npv(actual, predicted)
+}
+
+#' Positive Likelihood Ratio
+#'
+#' \code{lrp} is used to assessing the value of performing a
+#' diagnostic test, and estimates the ratio of the probability
+#' of a true positive result over the probability of a false positive
+#' result.
+#'
+#' It is implemented as the ratio: \code{sensitivity / (1 - specificity)}
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{tpr}} \code{\link{fpr}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' lrp(actual, predicted)
+lrp <- function(actual, predicted) {
+    sensitivity(actual, predicted) / (1 - specificity(actual, predicted))
+}
+
+#' Negative Likelihood Ratio
+#'
+#' \code{lrn} is used to assessing the value of performing a
+#' diagnostic test, and estimates the ratio of the probability
+#' of a true negative result over the probability of a false negative
+#' result.
+#'
+#' It is implemented as the ratio: \code{(1 - sensitivity) / specificity}
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{specificity}} \code{\link{sensitivity}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' lrn(actual, predicted)
+lrn <- function(actual, predicted) {
+    (1 - sensitivity(actual, predicted)) / specificity(actual, predicted)
+}