From 2cfa4961b8aeaaf5b163621ea68888e32b93fe7b Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Fri, 12 Jul 2019 09:38:51 -0500
Subject: [PATCH 01/12] added sensitivity, specifictity and related metrics

---
 R/binary_classification.R                   | 253 ++++++++++++++++++--
 tests/testthat/test-binary_classification.R | 181 +++++++++++++-
 2 files changed, 419 insertions(+), 15 deletions(-)

diff --git a/R/binary_classification.R b/R/binary_classification.R
index b8a2aa7..b67d991 100644
--- a/R/binary_classification.R
+++ b/R/binary_classification.R
@@ -12,7 +12,7 @@ NULL
 #' Area under the ROC curve (AUC)
 #'
 #' \code{auc} computes the area under the receiver-operator characteristic curve (AUC).
-#' 
+#'
 #' \code{auc} uses the fact that the area under the ROC curve is equal to the probability
 #' that a randomly chosen positive observation has a higher predicted value than a
 #' randomly chosen negative value. In order to compute this probability, we can
@@ -80,16 +80,16 @@ logLoss <- function(actual, predicted) {
 
 
 #' Precision
-#' 
+#'
 #' \code{precision} computes proportion of observations predicted to be in the
 #'                  positive class (i.e. the element in \code{predicted} equals 1)
-#'                  that actually belong to the positive class (i.e. the element 
+#'                  that actually belong to the positive class (i.e. the element
 #'                  in \code{actual} equals 1)
-#'     
+#'
 #' @inheritParams params_binary
 #' @export
 #' @seealso \code{\link{recall}} \code{\link{fbeta_score}}
-#' @examples 
+#' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 1, 1, 1, 1, 1)
 #' precision(actual, predicted)
@@ -98,16 +98,16 @@ precision <- function(actual, predicted) {
 }
 
 #' Recall
-#' 
+#'
 #' \code{recall} computes proportion of observations in the positive class
 #'               (i.e. the element in \code{actual} equals 1) that are predicted
 #'               to be in the positive class (i.e. the element in \code{predicted}
 #'               equals 1)
-#'     
+#'
 #' @inheritParams params_binary
 #' @export
 #' @seealso \code{\link{precision}} \code{\link{fbeta_score}}
-#' @examples 
+#' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' recall(actual, predicted)
@@ -116,20 +116,20 @@ recall <- function(actual, predicted) {
 }
 
 #' F-beta Score
-#' 
+#'
 #' \code{fbeta_score} computes a weighted harmonic mean of Precision and Recall.
 #'                    The \code{beta} parameter controls the weighting.
-#'                    
+#'
 #' @inheritParams params_binary
-#' @param beta A non-negative real number controlling how close the F-beta score is to 
-#'             either Precision or Recall. When \code{beta} is at the default of 1, 
+#' @param beta A non-negative real number controlling how close the F-beta score is to
+#'             either Precision or Recall. When \code{beta} is at the default of 1,
 #'             the F-beta Score is exactly an equally weighted harmonic mean.
-#'             The F-beta score will weight toward Precision when \code{beta} is less 
+#'             The F-beta score will weight toward Precision when \code{beta} is less
 #'             than one.  The F-beta score will weight toward Recall when \code{beta} is
 #'             greater than one.
 #' @export
 #' @seealso \code{\link{precision}} \code{\link{recall}}
-#' @examples 
+#' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' recall(actual, predicted)
@@ -138,3 +138,228 @@ fbeta_score <- function(actual, predicted, beta = 1) {
     rec <- recall(actual, predicted)
     return((1 + beta^2) * prec * rec / ((beta^2 * prec) + rec))
 }
+
+#' Recall
+#'
+#' \code{recall} computes proportion of observations in the positive class
+#'               (i.e. the element in \code{actual} equals 1) that are predicted
+#'               to be in the positive class (i.e. the element in \code{predicted}
+#'               equals 1)
+#'
+#' @inheritParams params_binary
+#' @export
+#' @seealso \code{\link{precision}} \code{\link{fbeta_score}}
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' recall(actual, predicted)
+recall <- function(actual, predicted) {
+    return(mean(predicted[actual == 1]))
+}
+
+#' Binary confusion matrix
+#'
+#' \code{cmat} Calculates a binary classification confusion matrix,
+#' comparing the predicted with the actual values for the classes.
+#' Assumes that 1 is used for the positive class and 0 for the
+#' negative class.
+#' Returns a \code{data.frame} with columns corresponding to the
+#' number of True Positives (\code{tp}), False Positives (\code{fp}),
+#' True Negatives (\code{tn}), and False Negatives (\code{fn})
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{sensitivity}} \code{\link{specificity}}
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' cmat(actual, predicted)
+
+cmat <- function(actual, predicted) {
+    tbl <- as.data.frame(table(actual, predicted))
+    cm <- data.frame(tp = NA, fn = NA, fp = NA, tn = NA)
+    cm[, c("tn", "fp", "fn", "tp")] <- tbl$Freq
+    cm
+}
+
+#' Sensitivity
+#'
+#' \code{sensitivity} calculates the proportion of actual positives
+#' that are correctly identified as such. It is also known as
+#' \code{true positive rate}.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{cmat}} \code{\link{specificity}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' sensitivity(actual, predicted)
+
+sensitivity <- function(actual, predicted) {
+    cm <- cmat(actual, predicted)
+    cm$tp / (cm$tp + cm$fn)
+}
+
+#' Specificity
+#'
+#' \code{specificity} calculates the proportion of actual negatives
+#' that are correctly identified as such. It is also known as
+#' \code{true negative rate}.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{cmat}} \code{\link{sensitivity}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' specificity(actual, predicted)
+
+specificity <- function(actual, predicted) {
+    cm <- cmat(actual, predicted)
+    cm$tn / (cm$tn + cm$fp)
+}
+
+#' False Negative Rate
+#'
+#' \code{fnr} calculates the proportion of actual positives
+#' that are not identified as such.
+#' It is defined as \code{1 - sensitivity}
+#'
+#' @inheritParams params_binary
+#' @export
+#' @seealso \code{\link{cmat}} \code{\link{sensitivity}}
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' fnr(actual, predicted)
+
+fnr <- function(actual, predicted) {
+    1 - sensitivity(actual, predicted)
+}
+
+#' False Positive Rate
+#'
+#' \code{fnr} calculates the proportion of actual negatives
+#' that are not identified as such.
+#' It is defined as \code{1 - specificity}
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{cmat}} \code{\link{specificity}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' fpr(actual, predicted)
+
+fpr <- function(actual, predicted) {
+    1 - specificity(actual, predicted)
+}
+
+#' Positive Predictive Value
+#'
+#' \code{ppv} calculates the proportion positive values that
+#' are true positive results.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{cmat}} \code{\link{npv}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' ppv(actual, predicted)
+
+ppv <- function(actual, predicted) {
+    cm <- cmat(actual, predicted)
+    cm$tp / (cm$tp + cm$fp)
+}
+
+#' Negative Predictive Value
+#'
+#' \code{ppv} calculates the proportion negative values that
+#' are true negative results.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{cmat}} \code{\link{npv}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' npv(actual, predicted)
+npv <- function(actual, predicted) {
+    cm <- cmat(actual, predicted)
+    cm$tn / (cm$tn + cm$fn)
+}
+
+#' False Discovery Rate
+#'
+#' \code{fdr} is the complement of the Positive Predictive
+#' Value (\code{ppv}), and is the proportion of positive
+#' results that are false positives.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{cmat}} \code{\link{ppv}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' fpr(actual, predicted)
+
+fdr <- function(actual, predicted) {
+    1 - ppv(actual, predicted)
+}
+
+#' False Omission Rate
+#'
+#' \code{fomr} is the complement of the Negative Predictive
+#' Value (\code{npv}), and is the proportion of negative
+#' results that are false negatives.
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{cmat}} \code{\link{npv}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' fomr(actual, predicted)
+
+fomr <- function(actual, predicted) {
+    1 - npv(actual, predicted)
+}
+
+#' Positive Likelihood Ratio
+#'
+#' \code{lrp} is used to assessing the value of performing a
+#' diagnostic test, and estimates the ratio of the probability
+#' of a true positive result over the probability of a false positive
+#' result: \code{sensitivity / (1 - specificity)}
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{tpr}} \code{\link{fpr}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' lrp(actual, predicted)
+
+lrp <- function(actual, predicted) {
+    sensitivity(actual, predicted) / (1 - specificity(actual, predicted))
+}
+
+#' Negative Likelihood Ratio
+#'
+#' \code{lrn} is used to assessing the value of performing a
+#' diagnostic test, and estimates the ratio of the probability
+#' of a true negative result over the probability of a false negative
+#' result: \code{specificity / (1 - sensitivity)}
+#'
+#' @inheritParams params_binary
+#' @seealso \code{\link{specificity}} \code{\link{sensitivity}}
+#' @export
+#' @examples
+#' actual <- c(1, 1, 1, 0, 0, 0)
+#' predicted <- c(1, 0, 1, 1, 1, 1)
+#' lrn(actual, predicted)
+
+lrn <- function(actual, predicted) {
+    (1 - sensitivity(actual, predicted)) / specificity(actual, predicted)
+}
diff --git a/tests/testthat/test-binary_classification.R b/tests/testthat/test-binary_classification.R
index b94d98f..c7567fb 100644
--- a/tests/testthat/test-binary_classification.R
+++ b/tests/testthat/test-binary_classification.R
@@ -9,7 +9,7 @@ test_that('area under ROC curve is calculated correctly', {
 })
 
 test_that('log loss is calculated correctly', {
-    expect_equal(ll(1,1), 0)  
+    expect_equal(ll(1,1), 0)
     expect_equal(ll(1,0), Inf)
     expect_equal(ll(0,1), Inf)
     expect_equal(ll(1,0.5), -log(0.5))
@@ -40,3 +40,182 @@ test_that('f-beta score is calculated correctly',{
   expect_equal(fbeta_score(c(1,1,0,0),c(1,1,1,1),beta=0), 1/2)
 })
 
+test_that(
+  "sensitivity is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(sensitivity(a, p), 2/3, tol = 0.01)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(sensitivity(a, p), 2/3, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(sensitivity(a, p), 2/3, tol = 0.01)
+  }
+)
+
+test_that(
+  "specificity is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(specificity(a, p), 0.91, tol = 0.01)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(specificity(a, p), 0.53, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(specificity(a, p), 0.53, tol = 0.01)
+  }
+)
+
+test_that(
+  "false negative rate (fnr) is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(fnr(a, p), 1/3, tol = 0.01)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(fnr(a, p), 1/3, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(fnr(a, p), 1/3, tol = 0.01)
+  }
+)
+
+test_that(
+  "false positive rate (fpr) is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(fpr(a, p), 0.09, tol = 0.01)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(fpr(a, p), 0.47, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(fpr(a, p), 0.47, tol = 0.01)
+  }
+)
+
+test_that(
+  "positive predictive value (ppv) is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(ppv(a, p), 0.1, tol = 0.01)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(ppv(a, p), 0.2, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(ppv(a, p), 0.38, tol = 0.01)
+  }
+)
+
+test_that(
+  "negative predictive value (npv) is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(npv(a, p), 0.995, tol = 0.01)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(npv(a, p), 0.9, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(npv(a, p), 0.787, tol = 0.01)
+  }
+)
+
+test_that(
+  "false discovery rate (fdr) is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(fdr(a, p), 0.9, tol = 0.01)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(fdr(a, p), 0.8, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(fdr(a, p), 0.623, tol = 0.01)
+  }
+)
+
+test_that(
+  "false omission rate (fomr) is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(fomr(a, p), 0.0055, tol = 0.0001)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(fomr(a, p), 0.1, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(fomr(a, p), 0.213, tol = 0.01)
+  }
+)
+
+test_that(
+  "positive likelihood rate (lrp) is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(lrp(a, p), 7.41, tol = 0.01)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(lrp(a, p), 1.42, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(lrp(a, p), 1.41, tol = 0.01)
+  }
+)
+
+test_that(
+  "negative likelihood rate (lrn) is calculated correctly",
+  {
+    # first dataset
+    a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual
+    p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted
+    expect_equal(lrn(a, p), 0.366, tol = 0.001)
+    # second dataset
+    a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual
+    p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted
+    expect_equal(lrn(a, p), 0.63, tol = 0.01)
+    # third dataset
+    a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual
+    p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted
+    expect_equal(lrn(a, p), 0.631, tol = 0.001)
+  }
+)

From ee4f64dabe02a83c0de65fd029a3eed0015679d6 Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Fri, 12 Jul 2019 09:42:11 -0500
Subject: [PATCH 02/12] removed copied code

---
 R/binary_classification.R | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/R/binary_classification.R b/R/binary_classification.R
index b67d991..9db8428 100644
--- a/R/binary_classification.R
+++ b/R/binary_classification.R
@@ -139,24 +139,6 @@ fbeta_score <- function(actual, predicted, beta = 1) {
     return((1 + beta^2) * prec * rec / ((beta^2 * prec) + rec))
 }
 
-#' Recall
-#'
-#' \code{recall} computes proportion of observations in the positive class
-#'               (i.e. the element in \code{actual} equals 1) that are predicted
-#'               to be in the positive class (i.e. the element in \code{predicted}
-#'               equals 1)
-#'
-#' @inheritParams params_binary
-#' @export
-#' @seealso \code{\link{precision}} \code{\link{fbeta_score}}
-#' @examples
-#' actual <- c(1, 1, 1, 0, 0, 0)
-#' predicted <- c(1, 0, 1, 1, 1, 1)
-#' recall(actual, predicted)
-recall <- function(actual, predicted) {
-    return(mean(predicted[actual == 1]))
-}
-
 #' Binary confusion matrix
 #'
 #' \code{cmat} Calculates a binary classification confusion matrix,

From c9451ad4de89ac1d3acd517451f637534f273758 Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Fri, 12 Jul 2019 10:10:39 -0500
Subject: [PATCH 03/12] updating README (part 1)

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index fdc30a8..f1d56c4 100644
--- a/README.md
+++ b/README.md
@@ -46,3 +46,7 @@ All functions in the **Metrics** package take at least two arguments: `actual` a
 | binary classification | Precision | precision | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7B%5Csum%20I%28y_i%20%3D%201%29%7D%5Csum_%7Bi%3D1%7D%20%5E%7Bn%7D%20I%28y_i%20%3D%201%29x_i) |
 | binary classification | Recall | recall | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7B%5Csum%20I%28x_i%20%3D%201%29%7D%5Csum_%7Bi%3D1%7D%20%5E%7Bn%7D%20I%28x_i%20%3D%201%29y_i) |
 | binary classification | F-beta Score | fbeta_score | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%281%20&plus;%20%5Cbeta%5E2%29%20%5Cfrac%7B%5Ctext%7Bprecision%7D%20*%20%5Ctext%7Brecall%7D%7D%7B%20%28%5Cbeta%5E2%20*%20%5Ctext%7Bprecision%7D%29%20&plus;%20%5Ctext%7Brecall%7D%7D) |
+| binary classification | Sensitivity | sensitivity | ![equation](http://latex.codecogs.com/gif.latex?\frac{TP}{TP%20+%20FN}) |
+| binary classification | Specificity | specificity | ![equation](http://latex.codecogs.com/gif.latex?\frac{TN}{TN%20+%20FP}) |
+| binary classification | False Negative Rate | fnr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TP%20+%20FN}%20=%20(1%20-%20{sensitivity})) |
+| binary classification | False Positive Rate | fpr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TN%20+%20FP}%20=%20(1%20-%20{specificity})) |
\ No newline at end of file

From 398fc491ea094310019bd6b7333b8a3136eb6dd3 Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Fri, 12 Jul 2019 10:21:55 -0500
Subject: [PATCH 04/12] finished with README, fixed doc for lrn

---
 R/binary_classification.R | 2 +-
 README.md                 | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/R/binary_classification.R b/R/binary_classification.R
index 9db8428..cd5cc5f 100644
--- a/R/binary_classification.R
+++ b/R/binary_classification.R
@@ -332,7 +332,7 @@ lrp <- function(actual, predicted) {
 #' \code{lrn} is used to assessing the value of performing a
 #' diagnostic test, and estimates the ratio of the probability
 #' of a true negative result over the probability of a false negative
-#' result: \code{specificity / (1 - sensitivity)}
+#' result: \code{(1 - sensitivity) / specificity}
 #'
 #' @inheritParams params_binary
 #' @seealso \code{\link{specificity}} \code{\link{sensitivity}}
diff --git a/README.md b/README.md
index f1d56c4..f200f1a 100644
--- a/README.md
+++ b/README.md
@@ -49,4 +49,11 @@ All functions in the **Metrics** package take at least two arguments: `actual` a
 | binary classification | Sensitivity | sensitivity | ![equation](http://latex.codecogs.com/gif.latex?\frac{TP}{TP%20+%20FN}) |
 | binary classification | Specificity | specificity | ![equation](http://latex.codecogs.com/gif.latex?\frac{TN}{TN%20+%20FP}) |
 | binary classification | False Negative Rate | fnr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TP%20+%20FN}%20=%20(1%20-%20{sensitivity})) |
-| binary classification | False Positive Rate | fpr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TN%20+%20FP}%20=%20(1%20-%20{specificity})) |
\ No newline at end of file
+| binary classification | False Positive Rate | fpr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TN%20+%20FP}%20=%20(1%20-%20{specificity})) |
+| binary classification | Positive Predictive Value | ppv | ![equation](http://latex.codecogs.com/gif.latex?\frac{TP}{TP%20+%20FP}) |
+| binary classification | Negative Predictive Value | npv | ![equation](http://latex.codecogs.com/gif.latex?\frac{TN}{TNP%20+%20FN}) |
+| binary classification | False Discovery Rate | fdr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TP%20+%20FP}%20=%20(1%20-%20{ppv})) |
+| binary classification | False Omission Rate | fomr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TN%20+%20FN}%20=%20(1%20-%20{npv})) |
+| binary classification | Positive Likelihood Ratio | lrp | ![equation](http://latex.codecogs.com/gif.latex?\frac{sensitivity}{1%20-%20specificity}) |
+| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20sensitivity}{specificity}%20=%20\frac{fnr}{tnr}) |
+

From 38d5a85893d37d3c63d0f6da6dc72431cbee8a66 Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Fri, 12 Jul 2019 10:23:01 -0500
Subject: [PATCH 05/12] minor edit on lnr equation

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f200f1a..41cdbd9 100644
--- a/README.md
+++ b/README.md
@@ -55,5 +55,5 @@ All functions in the **Metrics** package take at least two arguments: `actual` a
 | binary classification | False Discovery Rate | fdr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TP%20+%20FP}%20=%20(1%20-%20{ppv})) |
 | binary classification | False Omission Rate | fomr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TN%20+%20FN}%20=%20(1%20-%20{npv})) |
 | binary classification | Positive Likelihood Ratio | lrp | ![equation](http://latex.codecogs.com/gif.latex?\frac{sensitivity}{1%20-%20specificity}) |
-| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20sensitivity}{specificity}%20=%20\frac{fnr}{tnr}) |
+| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20  sensitivity}{specificity}) |
 

From 1674a3f27a0f8bc686a105ff01e3b02225d51acb Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Fri, 12 Jul 2019 10:24:13 -0500
Subject: [PATCH 06/12] fixed extra LF

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 41cdbd9..6cbd1b9 100644
--- a/README.md
+++ b/README.md
@@ -55,5 +55,5 @@ All functions in the **Metrics** package take at least two arguments: `actual` a
 | binary classification | False Discovery Rate | fdr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TP%20+%20FP}%20=%20(1%20-%20{ppv})) |
 | binary classification | False Omission Rate | fomr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TN%20+%20FN}%20=%20(1%20-%20{npv})) |
 | binary classification | Positive Likelihood Ratio | lrp | ![equation](http://latex.codecogs.com/gif.latex?\frac{sensitivity}{1%20-%20specificity}) |
-| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20  sensitivity}{specificity}) |
+| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20sensitivity}{specificity}) |
 

From 4514d3901c51600264bc25a35cc0f0000a9b5659 Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Mon, 29 Jul 2019 14:19:18 -0500
Subject: [PATCH 07/12] adding Rstudio project pattern

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index e5f7ab3..256128c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+*.Rproj
+
 # R History
 *.Rhistory
 
@@ -6,3 +8,4 @@
 
 # Project files
 [Bb]uild/
+.Rproj.user

From 1f6ee38ac5a6a87b953c9e8526093db29d5442e3 Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Mon, 29 Jul 2019 14:22:44 -0500
Subject: [PATCH 08/12] making the confusion_matrix function a bit more robust
 and fixing some docs and references

---
 R/binary_classification.R | 125 ++++++++++++++++++++++++--------------
 1 file changed, 79 insertions(+), 46 deletions(-)

diff --git a/R/binary_classification.R b/R/binary_classification.R
index cd5cc5f..2aacd02 100644
--- a/R/binary_classification.R
+++ b/R/binary_classification.R
@@ -88,13 +88,14 @@ logLoss <- function(actual, predicted) {
 #'
 #' @inheritParams params_binary
 #' @export
-#' @seealso \code{\link{recall}} \code{\link{fbeta_score}}
+#' @seealso \code{\link{recall}} \code{\link{fbeta_score}} \code{\link{ppv}}
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 1, 1, 1, 1, 1)
 #' precision(actual, predicted)
 precision <- function(actual, predicted) {
-    return(mean(actual[predicted == 1]))
+    cm <- confusion_matrix(actual, predicted)
+    cm$tp / (cm$tp + cm$fp)
 }
 
 #' Recall
@@ -112,7 +113,8 @@ precision <- function(actual, predicted) {
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' recall(actual, predicted)
 recall <- function(actual, predicted) {
-    return(mean(predicted[actual == 1]))
+    cm <- confusion_matrix(actual, predicted)
+    cm$tp / (cm$tp + cm$fn)
 }
 
 #' F-beta Score
@@ -141,10 +143,11 @@ fbeta_score <- function(actual, predicted, beta = 1) {
 
 #' Binary confusion matrix
 #'
-#' \code{cmat} Calculates a binary classification confusion matrix,
+#' \code{confusion_matrix} Calculates a binary classification confusion matrix,
 #' comparing the predicted with the actual values for the classes.
 #' Assumes that 1 is used for the positive class and 0 for the
 #' negative class.
+#'
 #' Returns a \code{data.frame} with columns corresponding to the
 #' number of True Positives (\code{tp}), False Positives (\code{fp}),
 #' True Negatives (\code{tn}), and False Negatives (\code{fn})
@@ -154,138 +157,167 @@ fbeta_score <- function(actual, predicted, beta = 1) {
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
-#' cmat(actual, predicted)
+#' confusion_matrix(actual, predicted)
+confusion_matrix <- function(actual, predicted) {
+    binvals <- c(0, 1)
+
+    # ideally "actual" should be a combination of 0s and 1s,
+    # but could be all 0s or all 1s as degenerate cases
+    if (!(
+        setequal(binvals, unique(actual)) |
+        setequal(c(0), unique(actual)) |
+        setequal(c(1), unique(actual))
+    )) {
+        stop(paste("Expecting a vector of 0s and 1s for 'actual'. Got:",
+                   paste(actual, collapse = ", ")))
+    }
+
+    # "predicted" could be all 0s, all 1s, or a combination
+    if (!(
+        setequal(binvals, unique(predicted)) |
+        setequal(c(0), unique(predicted)) |
+        setequal(c(1), unique(predicted))
+    )) {
+        stop(paste("Expecting a vector of 0s and 1s for 'predicted'. Got:",
+                    paste(predicted, collapse = ", ")))
+    }
 
-cmat <- function(actual, predicted) {
-    tbl <- as.data.frame(table(actual, predicted))
-    cm <- data.frame(tp = NA, fn = NA, fp = NA, tn = NA)
-    cm[, c("tn", "fp", "fn", "tp")] <- tbl$Freq
-    cm
+    if (length(actual) != length(predicted)) {
+        stop(
+            paste(
+                "Size of 'actual' and 'predicted' are not the same:",
+                length(actual), "!=", length(predicted)
+            )
+        )
+    }
+
+    # explicit comparison
+    tp <- sum(actual == 1 & predicted == 1)
+    tn <- sum(actual == 0 & predicted == 0)
+    fn <- sum(actual == 1 & predicted == 0)
+    fp <- sum(actual == 0 & predicted == 1)
+    data.frame("tp" = tp, "fn" = fn, "fp" = fp, "tn" = tn)
 }
 
 #' Sensitivity
 #'
-#' \code{sensitivity} calculates the proportion of actual positives
+#' \code{sensitivity} calculates the proportion of actual positives (\code{actual} equals 1)
 #' that are correctly identified as such. It is also known as
-#' \code{true positive rate}.
+#' \code{true positive rate} or \code{recall}.
 #'
 #' @inheritParams params_binary
-#' @seealso \code{\link{cmat}} \code{\link{specificity}}
+#' @seealso \code{\link{confusion_matrix}} \code{\link{specificity}}
 #' @export
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' sensitivity(actual, predicted)
-
 sensitivity <- function(actual, predicted) {
-    cm <- cmat(actual, predicted)
-    cm$tp / (cm$tp + cm$fn)
+    recall(actual, predicted)
 }
 
 #' Specificity
 #'
-#' \code{specificity} calculates the proportion of actual negatives
+#' \code{specificity} calculates the proportion of actual negatives (\code{actual} equals 0)
 #' that are correctly identified as such. It is also known as
 #' \code{true negative rate}.
 #'
 #' @inheritParams params_binary
-#' @seealso \code{\link{cmat}} \code{\link{sensitivity}}
+#' @seealso \code{\link{confusion_matrix}} \code{\link{sensitivity}}
 #' @export
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' specificity(actual, predicted)
-
 specificity <- function(actual, predicted) {
-    cm <- cmat(actual, predicted)
+    cm <- confusion_matrix(actual, predicted)
     cm$tn / (cm$tn + cm$fp)
 }
 
 #' False Negative Rate
 #'
-#' \code{fnr} calculates the proportion of actual positives
+#' \code{fnr} calculates the proportion of actual positives (\code{actual} equals 1)
 #' that are not identified as such.
+#'
 #' It is defined as \code{1 - sensitivity}
 #'
 #' @inheritParams params_binary
 #' @export
-#' @seealso \code{\link{cmat}} \code{\link{sensitivity}}
+#' @seealso \code{\link{confusion_matrix}} \code{\link{sensitivity}}
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' fnr(actual, predicted)
-
 fnr <- function(actual, predicted) {
     1 - sensitivity(actual, predicted)
 }
 
 #' False Positive Rate
 #'
-#' \code{fnr} calculates the proportion of actual negatives
+#' \code{fnr} calculates the proportion of actual negative values (\code{actual} equals 0)
 #' that are not identified as such.
+#'
 #' It is defined as \code{1 - specificity}
 #'
 #' @inheritParams params_binary
-#' @seealso \code{\link{cmat}} \code{\link{specificity}}
+#' @seealso \code{\link{confusion_matrix}} \code{\link{specificity}}
 #' @export
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' fpr(actual, predicted)
-
 fpr <- function(actual, predicted) {
     1 - specificity(actual, predicted)
 }
 
 #' Positive Predictive Value
 #'
-#' \code{ppv} calculates the proportion positive values that
-#' are true positive results.
+#' \code{ppv} calculates the proportion of all predicted positive values (\code{predicted} equals 1) that
+#' are true positive results. It is also known as \code{precision}
 #'
 #' @inheritParams params_binary
-#' @seealso \code{\link{cmat}} \code{\link{npv}}
+#' @seealso \code{\link{confusion_matrix}} \code{\link{npv}} \code{\link{precision}}
 #' @export
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' ppv(actual, predicted)
-
 ppv <- function(actual, predicted) {
-    cm <- cmat(actual, predicted)
-    cm$tp / (cm$tp + cm$fp)
+    precision(actual, predicted)
 }
 
 #' Negative Predictive Value
 #'
-#' \code{ppv} calculates the proportion negative values that
+#' \code{ppv} calculates the proportion all predicted negative values (\code{predicted} equals 0) that
 #' are true negative results.
 #'
 #' @inheritParams params_binary
-#' @seealso \code{\link{cmat}} \code{\link{npv}}
+#' @seealso \code{\link{confusion_matrix}} \code{\link{npv}}
 #' @export
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' npv(actual, predicted)
 npv <- function(actual, predicted) {
-    cm <- cmat(actual, predicted)
+    cm <- confusion_matrix(actual, predicted)
     cm$tn / (cm$tn + cm$fn)
 }
 
 #' False Discovery Rate
 #'
-#' \code{fdr} is the complement of the Positive Predictive
-#' Value (\code{ppv}), and is the proportion of positive
-#' results that are false positives.
+#' \code{fdr} computes proportion of observations predicted to be in
+#' the positive class (i.e. the element in \code{predicted} equals 1) that
+#' actually belong to the negative class (i.e.the element in \code{actual} equals 0).
+#'
+#' It is implemented as \code{1 - ppv}
 #'
 #' @inheritParams params_binary
-#' @seealso \code{\link{cmat}} \code{\link{ppv}}
+#' @seealso \code{\link{confusion_matrix}} \code{\link{ppv}}
 #' @export
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' fpr(actual, predicted)
-
 fdr <- function(actual, predicted) {
     1 - ppv(actual, predicted)
 }
@@ -297,13 +329,12 @@ fdr <- function(actual, predicted) {
 #' results that are false negatives.
 #'
 #' @inheritParams params_binary
-#' @seealso \code{\link{cmat}} \code{\link{npv}}
+#' @seealso \code{\link{confusion_matrix}} \code{\link{npv}}
 #' @export
 #' @examples
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' fomr(actual, predicted)
-
 fomr <- function(actual, predicted) {
     1 - npv(actual, predicted)
 }
@@ -313,7 +344,9 @@ fomr <- function(actual, predicted) {
 #' \code{lrp} is used to assessing the value of performing a
 #' diagnostic test, and estimates the ratio of the probability
 #' of a true positive result over the probability of a false positive
-#' result: \code{sensitivity / (1 - specificity)}
+#' result.
+#'
+#' It is implemented as the ratio: \code{sensitivity / (1 - specificity)}
 #'
 #' @inheritParams params_binary
 #' @seealso \code{\link{tpr}} \code{\link{fpr}}
@@ -322,7 +355,6 @@ fomr <- function(actual, predicted) {
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' lrp(actual, predicted)
-
 lrp <- function(actual, predicted) {
     sensitivity(actual, predicted) / (1 - specificity(actual, predicted))
 }
@@ -332,7 +364,9 @@ lrp <- function(actual, predicted) {
 #' \code{lrn} is used to assessing the value of performing a
 #' diagnostic test, and estimates the ratio of the probability
 #' of a true negative result over the probability of a false negative
-#' result: \code{(1 - sensitivity) / specificity}
+#' result.
+#'
+#' It is implemented as the ratio: \code{(1 - sensitivity) / specificity}
 #'
 #' @inheritParams params_binary
 #' @seealso \code{\link{specificity}} \code{\link{sensitivity}}
@@ -341,7 +375,6 @@ lrp <- function(actual, predicted) {
 #' actual <- c(1, 1, 1, 0, 0, 0)
 #' predicted <- c(1, 0, 1, 1, 1, 1)
 #' lrn(actual, predicted)
-
 lrn <- function(actual, predicted) {
     (1 - sensitivity(actual, predicted)) / specificity(actual, predicted)
 }

From 9805b641f7b59b6c1534c9d3b16242e9afc71f13 Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Mon, 29 Jul 2019 14:23:18 -0500
Subject: [PATCH 09/12] generated docs

---
 man/confusion_matrix.Rd | 35 +++++++++++++++++++++++++++++++++++
 man/fbeta_score.Rd      |  6 +++---
 man/fdr.Rd              | 32 ++++++++++++++++++++++++++++++++
 man/fnr.Rd              | 31 +++++++++++++++++++++++++++++++
 man/fomr.Rd             | 29 +++++++++++++++++++++++++++++
 man/fpr.Rd              | 31 +++++++++++++++++++++++++++++++
 man/lrn.Rd              | 33 +++++++++++++++++++++++++++++++++
 man/lrp.Rd              | 33 +++++++++++++++++++++++++++++++++
 man/npv.Rd              | 28 ++++++++++++++++++++++++++++
 man/ppv.Rd              | 28 ++++++++++++++++++++++++++++
 man/precision.Rd        |  4 ++--
 man/sensitivity.Rd      | 29 +++++++++++++++++++++++++++++
 man/specificity.Rd      | 29 +++++++++++++++++++++++++++++
 13 files changed, 343 insertions(+), 5 deletions(-)
 create mode 100644 man/confusion_matrix.Rd
 create mode 100644 man/fdr.Rd
 create mode 100644 man/fnr.Rd
 create mode 100644 man/fomr.Rd
 create mode 100644 man/fpr.Rd
 create mode 100644 man/lrn.Rd
 create mode 100644 man/lrp.Rd
 create mode 100644 man/npv.Rd
 create mode 100644 man/ppv.Rd
 create mode 100644 man/sensitivity.Rd
 create mode 100644 man/specificity.Rd

diff --git a/man/confusion_matrix.Rd b/man/confusion_matrix.Rd
new file mode 100644
index 0000000..eac8951
--- /dev/null
+++ b/man/confusion_matrix.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{confusion_matrix}
+\alias{confusion_matrix}
+\title{Binary confusion matrix}
+\usage{
+confusion_matrix(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{confusion_matrix} Calculates a binary classification confusion matrix,
+comparing the predicted with the actual values for the classes.
+Assumes that 1 is used for the positive class and 0 for the
+negative class.
+}
+\details{
+Returns a \code{data.frame} with columns corresponding to the
+number of True Positives (\code{tp}), False Positives (\code{fp}),
+True Negatives (\code{tn}), and False Negatives (\code{fn})
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+confusion_matrix(actual, predicted)
+}
+\seealso{
+\code{\link{sensitivity}} \code{\link{specificity}}
+}
diff --git a/man/fbeta_score.Rd b/man/fbeta_score.Rd
index aaceb11..eb8139c 100644
--- a/man/fbeta_score.Rd
+++ b/man/fbeta_score.Rd
@@ -14,10 +14,10 @@ class and 0 for the negative class.}
 class and 0 for the negative class. Each element represents the
 prediction for the corresponding element in \code{actual}.}
 
-\item{beta}{A non-negative real number controlling how close the F-beta score is to 
-either Precision or Recall. When \code{beta} is at the default of 1, 
+\item{beta}{A non-negative real number controlling how close the F-beta score is to
+either Precision or Recall. When \code{beta} is at the default of 1,
 the F-beta Score is exactly an equally weighted harmonic mean.
-The F-beta score will weight toward Precision when \code{beta} is less 
+The F-beta score will weight toward Precision when \code{beta} is less
 than one.  The F-beta score will weight toward Recall when \code{beta} is
 greater than one.}
 }
diff --git a/man/fdr.Rd b/man/fdr.Rd
new file mode 100644
index 0000000..8fd8ed5
--- /dev/null
+++ b/man/fdr.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{fdr}
+\alias{fdr}
+\title{False Discovery Rate}
+\usage{
+fdr(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{fdr} computes proportion of observations predicted to be in
+the positive class (i.e. the element in \code{predicted} equals 1) that
+actually belong to the negative class (i.e.the element in \code{actual} equals 0).
+}
+\details{
+It is implemented as \code{1 - ppv}
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+fpr(actual, predicted)
+}
+\seealso{
+\code{\link{confusion_matrix}} \code{\link{ppv}}
+}
diff --git a/man/fnr.Rd b/man/fnr.Rd
new file mode 100644
index 0000000..7edb8c2
--- /dev/null
+++ b/man/fnr.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{fnr}
+\alias{fnr}
+\title{False Negative Rate}
+\usage{
+fnr(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{fnr} calculates the proportion of actual positives (\code{actual} equals 1)
+that are not identified as such.
+}
+\details{
+It is defined as \code{1 - sensitivity}
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+fnr(actual, predicted)
+}
+\seealso{
+\code{\link{confusion_matrix}} \code{\link{sensitivity}}
+}
diff --git a/man/fomr.Rd b/man/fomr.Rd
new file mode 100644
index 0000000..a708cbf
--- /dev/null
+++ b/man/fomr.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{fomr}
+\alias{fomr}
+\title{False Omission Rate}
+\usage{
+fomr(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{fomr} is the complement of the Negative Predictive
+Value (\code{npv}), and is the proportion of negative
+results that are false negatives.
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+fomr(actual, predicted)
+}
+\seealso{
+\code{\link{confusion_matrix}} \code{\link{npv}}
+}
diff --git a/man/fpr.Rd b/man/fpr.Rd
new file mode 100644
index 0000000..400b7b3
--- /dev/null
+++ b/man/fpr.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{fpr}
+\alias{fpr}
+\title{False Positive Rate}
+\usage{
+fpr(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{fnr} calculates the proportion of actual negative values (\code{actual} equals 0)
+that are not identified as such.
+}
+\details{
+It is defined as \code{1 - specificity}
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+fpr(actual, predicted)
+}
+\seealso{
+\code{\link{confusion_matrix}} \code{\link{specificity}}
+}
diff --git a/man/lrn.Rd b/man/lrn.Rd
new file mode 100644
index 0000000..d8cc794
--- /dev/null
+++ b/man/lrn.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{lrn}
+\alias{lrn}
+\title{Negative Likelihood Ratio}
+\usage{
+lrn(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{lrn} is used to assessing the value of performing a
+diagnostic test, and estimates the ratio of the probability
+of a true negative result over the probability of a false negative
+result.
+}
+\details{
+It is implemented as the ratio: \code{(1 - sensitivity) / specificity}
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+lrn(actual, predicted)
+}
+\seealso{
+\code{\link{specificity}} \code{\link{sensitivity}}
+}
diff --git a/man/lrp.Rd b/man/lrp.Rd
new file mode 100644
index 0000000..fbc4b35
--- /dev/null
+++ b/man/lrp.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{lrp}
+\alias{lrp}
+\title{Positive Likelihood Ratio}
+\usage{
+lrp(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{lrp} is used to assessing the value of performing a
+diagnostic test, and estimates the ratio of the probability
+of a true positive result over the probability of a false positive
+result.
+}
+\details{
+It is implemented as the ratio: \code{sensitivity / (1 - specificity)}
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+lrp(actual, predicted)
+}
+\seealso{
+\code{\link{tpr}} \code{\link{fpr}}
+}
diff --git a/man/npv.Rd b/man/npv.Rd
new file mode 100644
index 0000000..d31a84e
--- /dev/null
+++ b/man/npv.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{npv}
+\alias{npv}
+\title{Negative Predictive Value}
+\usage{
+npv(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{ppv} calculates the proportion all predicted negative values (\code{predicted} equals 0) that
+are true negative results.
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+npv(actual, predicted)
+}
+\seealso{
+\code{\link{confusion_matrix}} \code{\link{npv}}
+}
diff --git a/man/ppv.Rd b/man/ppv.Rd
new file mode 100644
index 0000000..d3e36f8
--- /dev/null
+++ b/man/ppv.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{ppv}
+\alias{ppv}
+\title{Positive Predictive Value}
+\usage{
+ppv(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{ppv} calculates the proportion of all predicted positive values (\code{predicted} equals 1) that
+are true positive results. It is also known as \code{precision}
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+ppv(actual, predicted)
+}
+\seealso{
+\code{\link{confusion_matrix}} \code{\link{npv}} \code{\link{precision}}
+}
diff --git a/man/precision.Rd b/man/precision.Rd
index 5f27436..efe338f 100644
--- a/man/precision.Rd
+++ b/man/precision.Rd
@@ -17,7 +17,7 @@ prediction for the corresponding element in \code{actual}.}
 \description{
 \code{precision} computes proportion of observations predicted to be in the
                  positive class (i.e. the element in \code{predicted} equals 1)
-                 that actually belong to the positive class (i.e. the element 
+                 that actually belong to the positive class (i.e. the element
                  in \code{actual} equals 1)
 }
 \examples{
@@ -26,5 +26,5 @@ predicted <- c(1, 1, 1, 1, 1, 1)
 precision(actual, predicted)
 }
 \seealso{
-\code{\link{recall}} \code{\link{fbeta_score}}
+\code{\link{recall}} \code{\link{fbeta_score}} \code{\link{ppv}}
 }
diff --git a/man/sensitivity.Rd b/man/sensitivity.Rd
new file mode 100644
index 0000000..75a2055
--- /dev/null
+++ b/man/sensitivity.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{sensitivity}
+\alias{sensitivity}
+\title{Sensitivity}
+\usage{
+sensitivity(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{sensitivity} calculates the proportion of actual positives (\code{actual} equals 1)
+that are correctly identified as such. It is also known as
+\code{true positive rate} or \code{recall}.
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+sensitivity(actual, predicted)
+}
+\seealso{
+\code{\link{confusion_matrix}} \code{\link{specificity}}
+}
diff --git a/man/specificity.Rd b/man/specificity.Rd
new file mode 100644
index 0000000..a41523d
--- /dev/null
+++ b/man/specificity.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_classification.R
+\name{specificity}
+\alias{specificity}
+\title{Specificity}
+\usage{
+specificity(actual, predicted)
+}
+\arguments{
+\item{actual}{The ground truth binary numeric vector containing 1 for the positive
+class and 0 for the negative class.}
+
+\item{predicted}{The predicted binary numeric vector containing 1 for the positive
+class and 0 for the negative class. Each element represents the
+prediction for the corresponding element in \code{actual}.}
+}
+\description{
+\code{specificity} calculates the proportion of actual negatives (\code{actual} equals 0)
+that are correctly identified as such. It is also known as
+\code{true negative rate}.
+}
+\examples{
+actual <- c(1, 1, 1, 0, 0, 0)
+predicted <- c(1, 0, 1, 1, 1, 1)
+specificity(actual, predicted)
+}
+\seealso{
+\code{\link{confusion_matrix}} \code{\link{sensitivity}}
+}

From 52f376dbf955a4ccb91269077e58ac300b9cc6b1 Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Mon, 29 Jul 2019 14:23:49 -0500
Subject: [PATCH 10/12] updating package description and namespace

---
 .Rbuildignore |  3 ++-
 DESCRIPTION   |  5 +++--
 NAMESPACE     | 10 ++++++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 29dcfad..09aac15 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -4,4 +4,5 @@ proj$
 ^NEWS.*$
 .travis.yml
 ^CODE_OF_CONDUCT\.md$
-^CONTRIBUTING\.md$
\ No newline at end of file
+^CONTRIBUTING\.md$
+^.*\.Rproj$
diff --git a/DESCRIPTION b/DESCRIPTION
index 0309d2d..e352ad1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -9,11 +9,12 @@ Description: An implementation of evaluation metrics in R that are commonly
 Authors@R: c(
     person("Ben", "Hamner", role = c("aut", "cph"), email = "ben@benhamner.com"),
     person("Michael", "Frasco", role = c("aut", "cre"), email = "mfrasco6@gmail.com"),
-    person("Erin", "LeDell", role = c("ctb"), email = "oss@ledell.org"))
+    person("Erin", "LeDell", role = c("ctb"), email = "oss@ledell.org"),
+    person("Jesus", "Castagnetto", role = c("ctb"), email = "jesus@castagnetto.com"))
 Maintainer: Michael Frasco <mfrasco6@gmail.com>
 Suggests:
     testthat
 URL: https://github.com/mfrasco/Metrics
 BugReports: https://github.com/mfrasco/Metrics/issues
 License: BSD_3_clause + file LICENSE
-RoxygenNote: 6.1.0
+RoxygenNote: 6.1.1
diff --git a/NAMESPACE b/NAMESPACE
index fcfec79..422d2c1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,8 +12,14 @@ export(ce)
 export(explained_variation)
 export(f1)
 export(fbeta_score)
+export(fdr)
+export(fnr)
+export(fomr)
+export(fpr)
 export(ll)
 export(logLoss)
+export(lrn)
+export(lrp)
 export(mae)
 export(mape)
 export(mapk)
@@ -21,7 +27,9 @@ export(mase)
 export(mdae)
 export(mse)
 export(msle)
+export(npv)
 export(percent_bias)
+export(ppv)
 export(precision)
 export(rae)
 export(recall)
@@ -30,7 +38,9 @@ export(rmsle)
 export(rrse)
 export(rse)
 export(se)
+export(sensitivity)
 export(sle)
 export(smape)
+export(specificity)
 export(sse)
 importFrom(stats,median)

From 2307368657102ecb244c8351859cd8a66c09b5ce Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Tue, 30 Jul 2019 10:27:14 -0500
Subject: [PATCH 11/12] added .RData

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 256128c..6b35920 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.RData
 *.Rproj
 
 # R History

From a189e12c089c12dc39bc58568018e3d569cdc125 Mon Sep 17 00:00:00 2001
From: "Jesus M. Castagnetto" <jesus@castagnetto.com>
Date: Tue, 30 Jul 2019 10:27:31 -0500
Subject: [PATCH 12/12] simplifying code

---
 R/binary_classification.R | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/R/binary_classification.R b/R/binary_classification.R
index 2aacd02..fe2abd8 100644
--- a/R/binary_classification.R
+++ b/R/binary_classification.R
@@ -160,13 +160,15 @@ fbeta_score <- function(actual, predicted, beta = 1) {
 #' confusion_matrix(actual, predicted)
 confusion_matrix <- function(actual, predicted) {
     binvals <- c(0, 1)
+    b_actual <- unique(actual)
+    b_predicted <- unique(predicted)
 
     # ideally "actual" should be a combination of 0s and 1s,
     # but could be all 0s or all 1s as degenerate cases
     if (!(
-        setequal(binvals, unique(actual)) |
-        setequal(c(0), unique(actual)) |
-        setequal(c(1), unique(actual))
+        setequal(binvals, b_actual) |
+        setequal(c(0), b_actual) |
+        setequal(c(1), b_actual)
     )) {
         stop(paste("Expecting a vector of 0s and 1s for 'actual'. Got:",
                    paste(actual, collapse = ", ")))
@@ -174,9 +176,9 @@ confusion_matrix <- function(actual, predicted) {
 
     # "predicted" could be all 0s, all 1s, or a combination
     if (!(
-        setequal(binvals, unique(predicted)) |
-        setequal(c(0), unique(predicted)) |
-        setequal(c(1), unique(predicted))
+        setequal(binvals, b_predicted) |
+        setequal(c(0), b_predicted) |
+        setequal(c(1), b_predicted)
     )) {
         stop(paste("Expecting a vector of 0s and 1s for 'predicted'. Got:",
                     paste(predicted, collapse = ", ")))
@@ -185,18 +187,19 @@ confusion_matrix <- function(actual, predicted) {
     if (length(actual) != length(predicted)) {
         stop(
             paste(
-                "Size of 'actual' and 'predicted' are not the same:",
-                length(actual), "!=", length(predicted)
+                "Size of 'actual' and 'predicted' are not the same: length(actual):",
+                length(actual), "!= length(predicted):", length(predicted)
             )
         )
     }
 
-    # explicit comparison
-    tp <- sum(actual == 1 & predicted == 1)
-    tn <- sum(actual == 0 & predicted == 0)
-    fn <- sum(actual == 1 & predicted == 0)
-    fp <- sum(actual == 0 & predicted == 1)
-    data.frame("tp" = tp, "fn" = fn, "fp" = fp, "tn" = tn)
+    # explicit comparisons
+    data.frame(
+        "tp" = sum(actual == 1 & predicted == 1),
+        "fn" = sum(actual == 1 & predicted == 0),
+        "fp" = sum(actual == 0 & predicted == 1),
+        "tn" = sum(actual == 0 & predicted == 0)
+        )
 }
 
 #' Sensitivity