From 2cfa4961b8aeaaf5b163621ea68888e32b93fe7b Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Fri, 12 Jul 2019 09:38:51 -0500 Subject: [PATCH 01/12] added sensitivity, specifictity and related metrics --- R/binary_classification.R | 253 ++++++++++++++++++-- tests/testthat/test-binary_classification.R | 181 +++++++++++++- 2 files changed, 419 insertions(+), 15 deletions(-) diff --git a/R/binary_classification.R b/R/binary_classification.R index b8a2aa7..b67d991 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -12,7 +12,7 @@ NULL #' Area under the ROC curve (AUC) #' #' \code{auc} computes the area under the receiver-operator characteristic curve (AUC). -#' +#' #' \code{auc} uses the fact that the area under the ROC curve is equal to the probability #' that a randomly chosen positive observation has a higher predicted value than a #' randomly chosen negative value. In order to compute this probability, we can @@ -80,16 +80,16 @@ logLoss <- function(actual, predicted) { #' Precision -#' +#' #' \code{precision} computes proportion of observations predicted to be in the #' positive class (i.e. the element in \code{predicted} equals 1) -#' that actually belong to the positive class (i.e. the element +#' that actually belong to the positive class (i.e. the element #' in \code{actual} equals 1) -#' +#' #' @inheritParams params_binary #' @export #' @seealso \code{\link{recall}} \code{\link{fbeta_score}} -#' @examples +#' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 1, 1, 1, 1, 1) #' precision(actual, predicted) @@ -98,16 +98,16 @@ precision <- function(actual, predicted) { } #' Recall -#' +#' #' \code{recall} computes proportion of observations in the positive class #' (i.e. the element in \code{actual} equals 1) that are predicted #' to be in the positive class (i.e. the element in \code{predicted} #' equals 1) -#' +#' #' @inheritParams params_binary #' @export #' @seealso \code{\link{precision}} \code{\link{fbeta_score}} -#' @examples +#' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' recall(actual, predicted) @@ -116,20 +116,20 @@ recall <- function(actual, predicted) { } #' F-beta Score -#' +#' #' \code{fbeta_score} computes a weighted harmonic mean of Precision and Recall. #' The \code{beta} parameter controls the weighting. -#' +#' #' @inheritParams params_binary -#' @param beta A non-negative real number controlling how close the F-beta score is to -#' either Precision or Recall. When \code{beta} is at the default of 1, +#' @param beta A non-negative real number controlling how close the F-beta score is to +#' either Precision or Recall. When \code{beta} is at the default of 1, #' the F-beta Score is exactly an equally weighted harmonic mean. -#' The F-beta score will weight toward Precision when \code{beta} is less +#' The F-beta score will weight toward Precision when \code{beta} is less #' than one. The F-beta score will weight toward Recall when \code{beta} is #' greater than one. #' @export #' @seealso \code{\link{precision}} \code{\link{recall}} -#' @examples +#' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' recall(actual, predicted) @@ -138,3 +138,228 @@ fbeta_score <- function(actual, predicted, beta = 1) { rec <- recall(actual, predicted) return((1 + beta^2) * prec * rec / ((beta^2 * prec) + rec)) } + +#' Recall +#' +#' \code{recall} computes proportion of observations in the positive class +#' (i.e. the element in \code{actual} equals 1) that are predicted +#' to be in the positive class (i.e. the element in \code{predicted} +#' equals 1) +#' +#' @inheritParams params_binary +#' @export +#' @seealso \code{\link{precision}} \code{\link{fbeta_score}} +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' recall(actual, predicted) +recall <- function(actual, predicted) { + return(mean(predicted[actual == 1])) +} + +#' Binary confusion matrix +#' +#' \code{cmat} Calculates a binary classification confusion matrix, +#' comparing the predicted with the actual values for the classes. +#' Assumes that 1 is used for the positive class and 0 for the +#' negative class. +#' Returns a \code{data.frame} with columns corresponding to the +#' number of True Positives (\code{tp}), False Positives (\code{fp}), +#' True Negatives (\code{tn}), and False Negatives (\code{fn}) +#' +#' @inheritParams params_binary +#' @seealso \code{\link{sensitivity}} \code{\link{specificity}} +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' cmat(actual, predicted) + +cmat <- function(actual, predicted) { + tbl <- as.data.frame(table(actual, predicted)) + cm <- data.frame(tp = NA, fn = NA, fp = NA, tn = NA) + cm[, c("tn", "fp", "fn", "tp")] <- tbl$Freq + cm +} + +#' Sensitivity +#' +#' \code{sensitivity} calculates the proportion of actual positives +#' that are correctly identified as such. It is also known as +#' \code{true positive rate}. +#' +#' @inheritParams params_binary +#' @seealso \code{\link{cmat}} \code{\link{specificity}} +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' sensitivity(actual, predicted) + +sensitivity <- function(actual, predicted) { + cm <- cmat(actual, predicted) + cm$tp / (cm$tp + cm$fn) +} + +#' Specificity +#' +#' \code{specificity} calculates the proportion of actual negatives +#' that are correctly identified as such. It is also known as +#' \code{true negative rate}. +#' +#' @inheritParams params_binary +#' @seealso \code{\link{cmat}} \code{\link{sensitivity}} +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' specificity(actual, predicted) + +specificity <- function(actual, predicted) { + cm <- cmat(actual, predicted) + cm$tn / (cm$tn + cm$fp) +} + +#' False Negative Rate +#' +#' \code{fnr} calculates the proportion of actual positives +#' that are not identified as such. +#' It is defined as \code{1 - sensitivity} +#' +#' @inheritParams params_binary +#' @export +#' @seealso \code{\link{cmat}} \code{\link{sensitivity}} +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' fnr(actual, predicted) + +fnr <- function(actual, predicted) { + 1 - sensitivity(actual, predicted) +} + +#' False Positive Rate +#' +#' \code{fnr} calculates the proportion of actual negatives +#' that are not identified as such. +#' It is defined as \code{1 - specificity} +#' +#' @inheritParams params_binary +#' @seealso \code{\link{cmat}} \code{\link{specificity}} +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' fpr(actual, predicted) + +fpr <- function(actual, predicted) { + 1 - specificity(actual, predicted) +} + +#' Positive Predictive Value +#' +#' \code{ppv} calculates the proportion positive values that +#' are true positive results. +#' +#' @inheritParams params_binary +#' @seealso \code{\link{cmat}} \code{\link{npv}} +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' ppv(actual, predicted) + +ppv <- function(actual, predicted) { + cm <- cmat(actual, predicted) + cm$tp / (cm$tp + cm$fp) +} + +#' Negative Predictive Value +#' +#' \code{ppv} calculates the proportion negative values that +#' are true negative results. +#' +#' @inheritParams params_binary +#' @seealso \code{\link{cmat}} \code{\link{npv}} +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' npv(actual, predicted) +npv <- function(actual, predicted) { + cm <- cmat(actual, predicted) + cm$tn / (cm$tn + cm$fn) +} + +#' False Discovery Rate +#' +#' \code{fdr} is the complement of the Positive Predictive +#' Value (\code{ppv}), and is the proportion of positive +#' results that are false positives. +#' +#' @inheritParams params_binary +#' @seealso \code{\link{cmat}} \code{\link{ppv}} +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' fpr(actual, predicted) + +fdr <- function(actual, predicted) { + 1 - ppv(actual, predicted) +} + +#' False Omission Rate +#' +#' \code{fomr} is the complement of the Negative Predictive +#' Value (\code{npv}), and is the proportion of negative +#' results that are false negatives. +#' +#' @inheritParams params_binary +#' @seealso \code{\link{cmat}} \code{\link{npv}} +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' fomr(actual, predicted) + +fomr <- function(actual, predicted) { + 1 - npv(actual, predicted) +} + +#' Positive Likelihood Ratio +#' +#' \code{lrp} is used to assessing the value of performing a +#' diagnostic test, and estimates the ratio of the probability +#' of a true positive result over the probability of a false positive +#' result: \code{sensitivity / (1 - specificity)} +#' +#' @inheritParams params_binary +#' @seealso \code{\link{tpr}} \code{\link{fpr}} +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' lrp(actual, predicted) + +lrp <- function(actual, predicted) { + sensitivity(actual, predicted) / (1 - specificity(actual, predicted)) +} + +#' Negative Likelihood Ratio +#' +#' \code{lrn} is used to assessing the value of performing a +#' diagnostic test, and estimates the ratio of the probability +#' of a true negative result over the probability of a false negative +#' result: \code{specificity / (1 - sensitivity)} +#' +#' @inheritParams params_binary +#' @seealso \code{\link{specificity}} \code{\link{sensitivity}} +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' lrn(actual, predicted) + +lrn <- function(actual, predicted) { + (1 - sensitivity(actual, predicted)) / specificity(actual, predicted) +} diff --git a/tests/testthat/test-binary_classification.R b/tests/testthat/test-binary_classification.R index b94d98f..c7567fb 100644 --- a/tests/testthat/test-binary_classification.R +++ b/tests/testthat/test-binary_classification.R @@ -9,7 +9,7 @@ test_that('area under ROC curve is calculated correctly', { }) test_that('log loss is calculated correctly', { - expect_equal(ll(1,1), 0) + expect_equal(ll(1,1), 0) expect_equal(ll(1,0), Inf) expect_equal(ll(0,1), Inf) expect_equal(ll(1,0.5), -log(0.5)) @@ -40,3 +40,182 @@ test_that('f-beta score is calculated correctly',{ expect_equal(fbeta_score(c(1,1,0,0),c(1,1,1,1),beta=0), 1/2) }) +test_that( + "sensitivity is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(sensitivity(a, p), 2/3, tol = 0.01) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(sensitivity(a, p), 2/3, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(sensitivity(a, p), 2/3, tol = 0.01) + } +) + +test_that( + "specificity is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(specificity(a, p), 0.91, tol = 0.01) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(specificity(a, p), 0.53, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(specificity(a, p), 0.53, tol = 0.01) + } +) + +test_that( + "false negative rate (fnr) is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(fnr(a, p), 1/3, tol = 0.01) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(fnr(a, p), 1/3, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(fnr(a, p), 1/3, tol = 0.01) + } +) + +test_that( + "false positive rate (fpr) is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(fpr(a, p), 0.09, tol = 0.01) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(fpr(a, p), 0.47, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(fpr(a, p), 0.47, tol = 0.01) + } +) + +test_that( + "positive predictive value (ppv) is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(ppv(a, p), 0.1, tol = 0.01) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(ppv(a, p), 0.2, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(ppv(a, p), 0.38, tol = 0.01) + } +) + +test_that( + "negative predictive value (npv) is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(npv(a, p), 0.995, tol = 0.01) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(npv(a, p), 0.9, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(npv(a, p), 0.787, tol = 0.01) + } +) + +test_that( + "false discovery rate (fdr) is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(fdr(a, p), 0.9, tol = 0.01) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(fdr(a, p), 0.8, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(fdr(a, p), 0.623, tol = 0.01) + } +) + +test_that( + "false omission rate (fomr) is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(fomr(a, p), 0.0055, tol = 0.0001) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(fomr(a, p), 0.1, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(fomr(a, p), 0.213, tol = 0.01) + } +) + +test_that( + "positive likelihood rate (lrp) is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(lrp(a, p), 7.41, tol = 0.01) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(lrp(a, p), 1.42, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(lrp(a, p), 1.41, tol = 0.01) + } +) + +test_that( + "negative likelihood rate (lrn) is calculated correctly", + { + # first dataset + a <- c(rep(1, 20), rep(1, 180), rep(0, 10), rep(0, 1820)) # actual + p <- c(rep(1, 20), rep(0, 180), rep(1, 10), rep(0, 1820)) # predicted + expect_equal(lrn(a, p), 0.366, tol = 0.001) + # second dataset + a <- c(rep(1, 10), rep(1, 40), rep(0, 5), rep(0, 45)) # actual + p <- c(rep(1, 10), rep(0, 40), rep(1, 5), rep(0, 45)) # predicted + expect_equal(lrn(a, p), 0.63, tol = 0.01) + # third dataset + a <- c(rep(1, 20), rep(1, 33), rep(0, 10), rep(0, 37)) # actual + p <- c(rep(1, 20), rep(0, 33), rep(1, 10), rep(0, 37)) # predicted + expect_equal(lrn(a, p), 0.631, tol = 0.001) + } +) From ee4f64dabe02a83c0de65fd029a3eed0015679d6 Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Fri, 12 Jul 2019 09:42:11 -0500 Subject: [PATCH 02/12] removed copied code --- R/binary_classification.R | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/R/binary_classification.R b/R/binary_classification.R index b67d991..9db8428 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -139,24 +139,6 @@ fbeta_score <- function(actual, predicted, beta = 1) { return((1 + beta^2) * prec * rec / ((beta^2 * prec) + rec)) } -#' Recall -#' -#' \code{recall} computes proportion of observations in the positive class -#' (i.e. the element in \code{actual} equals 1) that are predicted -#' to be in the positive class (i.e. the element in \code{predicted} -#' equals 1) -#' -#' @inheritParams params_binary -#' @export -#' @seealso \code{\link{precision}} \code{\link{fbeta_score}} -#' @examples -#' actual <- c(1, 1, 1, 0, 0, 0) -#' predicted <- c(1, 0, 1, 1, 1, 1) -#' recall(actual, predicted) -recall <- function(actual, predicted) { - return(mean(predicted[actual == 1])) -} - #' Binary confusion matrix #' #' \code{cmat} Calculates a binary classification confusion matrix, From c9451ad4de89ac1d3acd517451f637534f273758 Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Fri, 12 Jul 2019 10:10:39 -0500 Subject: [PATCH 03/12] updating README (part 1) --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index fdc30a8..f1d56c4 100644 --- a/README.md +++ b/README.md @@ -46,3 +46,7 @@ All functions in the **Metrics** package take at least two arguments: `actual` a | binary classification | Precision | precision | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7B%5Csum%20I%28y_i%20%3D%201%29%7D%5Csum_%7Bi%3D1%7D%20%5E%7Bn%7D%20I%28y_i%20%3D%201%29x_i) | | binary classification | Recall | recall | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7B%5Csum%20I%28x_i%20%3D%201%29%7D%5Csum_%7Bi%3D1%7D%20%5E%7Bn%7D%20I%28x_i%20%3D%201%29y_i) | | binary classification | F-beta Score | fbeta_score | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%281%20+%20%5Cbeta%5E2%29%20%5Cfrac%7B%5Ctext%7Bprecision%7D%20*%20%5Ctext%7Brecall%7D%7D%7B%20%28%5Cbeta%5E2%20*%20%5Ctext%7Bprecision%7D%29%20+%20%5Ctext%7Brecall%7D%7D) | +| binary classification | Sensitivity | sensitivity | ![equation](http://latex.codecogs.com/gif.latex?\frac{TP}{TP%20+%20FN}) | +| binary classification | Specificity | specificity | ![equation](http://latex.codecogs.com/gif.latex?\frac{TN}{TN%20+%20FP}) | +| binary classification | False Negative Rate | fnr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TP%20+%20FN}%20=%20(1%20-%20{sensitivity})) | +| binary classification | False Positive Rate | fpr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TN%20+%20FP}%20=%20(1%20-%20{specificity})) | \ No newline at end of file From 398fc491ea094310019bd6b7333b8a3136eb6dd3 Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Fri, 12 Jul 2019 10:21:55 -0500 Subject: [PATCH 04/12] finished with README, fixed doc for lrn --- R/binary_classification.R | 2 +- README.md | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/R/binary_classification.R b/R/binary_classification.R index 9db8428..cd5cc5f 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -332,7 +332,7 @@ lrp <- function(actual, predicted) { #' \code{lrn} is used to assessing the value of performing a #' diagnostic test, and estimates the ratio of the probability #' of a true negative result over the probability of a false negative -#' result: \code{specificity / (1 - sensitivity)} +#' result: \code{(1 - sensitivity) / specificity} #' #' @inheritParams params_binary #' @seealso \code{\link{specificity}} \code{\link{sensitivity}} diff --git a/README.md b/README.md index f1d56c4..f200f1a 100644 --- a/README.md +++ b/README.md @@ -49,4 +49,11 @@ All functions in the **Metrics** package take at least two arguments: `actual` a | binary classification | Sensitivity | sensitivity | ![equation](http://latex.codecogs.com/gif.latex?\frac{TP}{TP%20+%20FN}) | | binary classification | Specificity | specificity | ![equation](http://latex.codecogs.com/gif.latex?\frac{TN}{TN%20+%20FP}) | | binary classification | False Negative Rate | fnr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TP%20+%20FN}%20=%20(1%20-%20{sensitivity})) | -| binary classification | False Positive Rate | fpr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TN%20+%20FP}%20=%20(1%20-%20{specificity})) | \ No newline at end of file +| binary classification | False Positive Rate | fpr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TN%20+%20FP}%20=%20(1%20-%20{specificity})) | +| binary classification | Positive Predictive Value | ppv | ![equation](http://latex.codecogs.com/gif.latex?\frac{TP}{TP%20+%20FP}) | +| binary classification | Negative Predictive Value | npv | ![equation](http://latex.codecogs.com/gif.latex?\frac{TN}{TNP%20+%20FN}) | +| binary classification | False Discovery Rate | fdr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TP%20+%20FP}%20=%20(1%20-%20{ppv})) | +| binary classification | False Omission Rate | fomr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TN%20+%20FN}%20=%20(1%20-%20{npv})) | +| binary classification | Positive Likelihood Ratio | lrp | ![equation](http://latex.codecogs.com/gif.latex?\frac{sensitivity}{1%20-%20specificity}) | +| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20sensitivity}{specificity}%20=%20\frac{fnr}{tnr}) | + From 38d5a85893d37d3c63d0f6da6dc72431cbee8a66 Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Fri, 12 Jul 2019 10:23:01 -0500 Subject: [PATCH 05/12] minor edit on lnr equation --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f200f1a..41cdbd9 100644 --- a/README.md +++ b/README.md @@ -55,5 +55,5 @@ All functions in the **Metrics** package take at least two arguments: `actual` a | binary classification | False Discovery Rate | fdr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TP%20+%20FP}%20=%20(1%20-%20{ppv})) | | binary classification | False Omission Rate | fomr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TN%20+%20FN}%20=%20(1%20-%20{npv})) | | binary classification | Positive Likelihood Ratio | lrp | ![equation](http://latex.codecogs.com/gif.latex?\frac{sensitivity}{1%20-%20specificity}) | -| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20sensitivity}{specificity}%20=%20\frac{fnr}{tnr}) | +| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20 sensitivity}{specificity}) | From 1674a3f27a0f8bc686a105ff01e3b02225d51acb Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Fri, 12 Jul 2019 10:24:13 -0500 Subject: [PATCH 06/12] fixed extra LF --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 41cdbd9..6cbd1b9 100644 --- a/README.md +++ b/README.md @@ -55,5 +55,5 @@ All functions in the **Metrics** package take at least two arguments: `actual` a | binary classification | False Discovery Rate | fdr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FP}{TP%20+%20FP}%20=%20(1%20-%20{ppv})) | | binary classification | False Omission Rate | fomr | ![equation](http://latex.codecogs.com/gif.latex?\frac{FN}{TN%20+%20FN}%20=%20(1%20-%20{npv})) | | binary classification | Positive Likelihood Ratio | lrp | ![equation](http://latex.codecogs.com/gif.latex?\frac{sensitivity}{1%20-%20specificity}) | -| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20 sensitivity}{specificity}) | +| binary classification | Negative Likelihood Ratio | lrn | ![equation](http://latex.codecogs.com/gif.latex?\frac{1%20-%20sensitivity}{specificity}) | From 4514d3901c51600264bc25a35cc0f0000a9b5659 Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Mon, 29 Jul 2019 14:19:18 -0500 Subject: [PATCH 07/12] adding Rstudio project pattern --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index e5f7ab3..256128c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +*.Rproj + # R History *.Rhistory @@ -6,3 +8,4 @@ # Project files [Bb]uild/ +.Rproj.user From 1f6ee38ac5a6a87b953c9e8526093db29d5442e3 Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Mon, 29 Jul 2019 14:22:44 -0500 Subject: [PATCH 08/12] making the confusion_matrix function a bit more robust and fixing some docs and references --- R/binary_classification.R | 125 ++++++++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/R/binary_classification.R b/R/binary_classification.R index cd5cc5f..2aacd02 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -88,13 +88,14 @@ logLoss <- function(actual, predicted) { #' #' @inheritParams params_binary #' @export -#' @seealso \code{\link{recall}} \code{\link{fbeta_score}} +#' @seealso \code{\link{recall}} \code{\link{fbeta_score}} \code{\link{ppv}} #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 1, 1, 1, 1, 1) #' precision(actual, predicted) precision <- function(actual, predicted) { - return(mean(actual[predicted == 1])) + cm <- confusion_matrix(actual, predicted) + cm$tp / (cm$tp + cm$fp) } #' Recall @@ -112,7 +113,8 @@ precision <- function(actual, predicted) { #' predicted <- c(1, 0, 1, 1, 1, 1) #' recall(actual, predicted) recall <- function(actual, predicted) { - return(mean(predicted[actual == 1])) + cm <- confusion_matrix(actual, predicted) + cm$tp / (cm$tp + cm$fn) } #' F-beta Score @@ -141,10 +143,11 @@ fbeta_score <- function(actual, predicted, beta = 1) { #' Binary confusion matrix #' -#' \code{cmat} Calculates a binary classification confusion matrix, +#' \code{confusion_matrix} Calculates a binary classification confusion matrix, #' comparing the predicted with the actual values for the classes. #' Assumes that 1 is used for the positive class and 0 for the #' negative class. +#' #' Returns a \code{data.frame} with columns corresponding to the #' number of True Positives (\code{tp}), False Positives (\code{fp}), #' True Negatives (\code{tn}), and False Negatives (\code{fn}) @@ -154,138 +157,167 @@ fbeta_score <- function(actual, predicted, beta = 1) { #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) -#' cmat(actual, predicted) +#' confusion_matrix(actual, predicted) +confusion_matrix <- function(actual, predicted) { + binvals <- c(0, 1) + + # ideally "actual" should be a combination of 0s and 1s, + # but could be all 0s or all 1s as degenerate cases + if (!( + setequal(binvals, unique(actual)) | + setequal(c(0), unique(actual)) | + setequal(c(1), unique(actual)) + )) { + stop(paste("Expecting a vector of 0s and 1s for 'actual'. Got:", + paste(actual, collapse = ", "))) + } + + # "predicted" could be all 0s, all 1s, or a combination + if (!( + setequal(binvals, unique(predicted)) | + setequal(c(0), unique(predicted)) | + setequal(c(1), unique(predicted)) + )) { + stop(paste("Expecting a vector of 0s and 1s for 'predicted'. Got:", + paste(predicted, collapse = ", "))) + } -cmat <- function(actual, predicted) { - tbl <- as.data.frame(table(actual, predicted)) - cm <- data.frame(tp = NA, fn = NA, fp = NA, tn = NA) - cm[, c("tn", "fp", "fn", "tp")] <- tbl$Freq - cm + if (length(actual) != length(predicted)) { + stop( + paste( + "Size of 'actual' and 'predicted' are not the same:", + length(actual), "!=", length(predicted) + ) + ) + } + + # explicit comparison + tp <- sum(actual == 1 & predicted == 1) + tn <- sum(actual == 0 & predicted == 0) + fn <- sum(actual == 1 & predicted == 0) + fp <- sum(actual == 0 & predicted == 1) + data.frame("tp" = tp, "fn" = fn, "fp" = fp, "tn" = tn) } #' Sensitivity #' -#' \code{sensitivity} calculates the proportion of actual positives +#' \code{sensitivity} calculates the proportion of actual positives (\code{actual} equals 1) #' that are correctly identified as such. It is also known as -#' \code{true positive rate}. +#' \code{true positive rate} or \code{recall}. #' #' @inheritParams params_binary -#' @seealso \code{\link{cmat}} \code{\link{specificity}} +#' @seealso \code{\link{confusion_matrix}} \code{\link{specificity}} #' @export #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' sensitivity(actual, predicted) - sensitivity <- function(actual, predicted) { - cm <- cmat(actual, predicted) - cm$tp / (cm$tp + cm$fn) + recall(actual, predicted) } #' Specificity #' -#' \code{specificity} calculates the proportion of actual negatives +#' \code{specificity} calculates the proportion of actual negatives (\code{actual} equals 0) #' that are correctly identified as such. It is also known as #' \code{true negative rate}. #' #' @inheritParams params_binary -#' @seealso \code{\link{cmat}} \code{\link{sensitivity}} +#' @seealso \code{\link{confusion_matrix}} \code{\link{sensitivity}} #' @export #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' specificity(actual, predicted) - specificity <- function(actual, predicted) { - cm <- cmat(actual, predicted) + cm <- confusion_matrix(actual, predicted) cm$tn / (cm$tn + cm$fp) } #' False Negative Rate #' -#' \code{fnr} calculates the proportion of actual positives +#' \code{fnr} calculates the proportion of actual positives (\code{actual} equals 1) #' that are not identified as such. +#' #' It is defined as \code{1 - sensitivity} #' #' @inheritParams params_binary #' @export -#' @seealso \code{\link{cmat}} \code{\link{sensitivity}} +#' @seealso \code{\link{confusion_matrix}} \code{\link{sensitivity}} #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' fnr(actual, predicted) - fnr <- function(actual, predicted) { 1 - sensitivity(actual, predicted) } #' False Positive Rate #' -#' \code{fnr} calculates the proportion of actual negatives +#' \code{fnr} calculates the proportion of actual negative values (\code{actual} equals 0) #' that are not identified as such. +#' #' It is defined as \code{1 - specificity} #' #' @inheritParams params_binary -#' @seealso \code{\link{cmat}} \code{\link{specificity}} +#' @seealso \code{\link{confusion_matrix}} \code{\link{specificity}} #' @export #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' fpr(actual, predicted) - fpr <- function(actual, predicted) { 1 - specificity(actual, predicted) } #' Positive Predictive Value #' -#' \code{ppv} calculates the proportion positive values that -#' are true positive results. +#' \code{ppv} calculates the proportion of all predicted positive values (\code{predicted} equals 1) that +#' are true positive results. It is also known as \code{precision} #' #' @inheritParams params_binary -#' @seealso \code{\link{cmat}} \code{\link{npv}} +#' @seealso \code{\link{confusion_matrix}} \code{\link{npv}} \code{\link{precision}} #' @export #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' ppv(actual, predicted) - ppv <- function(actual, predicted) { - cm <- cmat(actual, predicted) - cm$tp / (cm$tp + cm$fp) + precision(actual, predicted) } #' Negative Predictive Value #' -#' \code{ppv} calculates the proportion negative values that +#' \code{ppv} calculates the proportion all predicted negative values (\code{predicted} equals 0) that #' are true negative results. #' #' @inheritParams params_binary -#' @seealso \code{\link{cmat}} \code{\link{npv}} +#' @seealso \code{\link{confusion_matrix}} \code{\link{npv}} #' @export #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' npv(actual, predicted) npv <- function(actual, predicted) { - cm <- cmat(actual, predicted) + cm <- confusion_matrix(actual, predicted) cm$tn / (cm$tn + cm$fn) } #' False Discovery Rate #' -#' \code{fdr} is the complement of the Positive Predictive -#' Value (\code{ppv}), and is the proportion of positive -#' results that are false positives. +#' \code{fdr} computes proportion of observations predicted to be in +#' the positive class (i.e. the element in \code{predicted} equals 1) that +#' actually belong to the negative class (i.e.the element in \code{actual} equals 0). +#' +#' It is implemented as \code{1 - ppv} #' #' @inheritParams params_binary -#' @seealso \code{\link{cmat}} \code{\link{ppv}} +#' @seealso \code{\link{confusion_matrix}} \code{\link{ppv}} #' @export #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' fpr(actual, predicted) - fdr <- function(actual, predicted) { 1 - ppv(actual, predicted) } @@ -297,13 +329,12 @@ fdr <- function(actual, predicted) { #' results that are false negatives. #' #' @inheritParams params_binary -#' @seealso \code{\link{cmat}} \code{\link{npv}} +#' @seealso \code{\link{confusion_matrix}} \code{\link{npv}} #' @export #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' fomr(actual, predicted) - fomr <- function(actual, predicted) { 1 - npv(actual, predicted) } @@ -313,7 +344,9 @@ fomr <- function(actual, predicted) { #' \code{lrp} is used to assessing the value of performing a #' diagnostic test, and estimates the ratio of the probability #' of a true positive result over the probability of a false positive -#' result: \code{sensitivity / (1 - specificity)} +#' result. +#' +#' It is implemented as the ratio: \code{sensitivity / (1 - specificity)} #' #' @inheritParams params_binary #' @seealso \code{\link{tpr}} \code{\link{fpr}} @@ -322,7 +355,6 @@ fomr <- function(actual, predicted) { #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' lrp(actual, predicted) - lrp <- function(actual, predicted) { sensitivity(actual, predicted) / (1 - specificity(actual, predicted)) } @@ -332,7 +364,9 @@ lrp <- function(actual, predicted) { #' \code{lrn} is used to assessing the value of performing a #' diagnostic test, and estimates the ratio of the probability #' of a true negative result over the probability of a false negative -#' result: \code{(1 - sensitivity) / specificity} +#' result. +#' +#' It is implemented as the ratio: \code{(1 - sensitivity) / specificity} #' #' @inheritParams params_binary #' @seealso \code{\link{specificity}} \code{\link{sensitivity}} @@ -341,7 +375,6 @@ lrp <- function(actual, predicted) { #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' lrn(actual, predicted) - lrn <- function(actual, predicted) { (1 - sensitivity(actual, predicted)) / specificity(actual, predicted) } From 9805b641f7b59b6c1534c9d3b16242e9afc71f13 Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Mon, 29 Jul 2019 14:23:18 -0500 Subject: [PATCH 09/12] generated docs --- man/confusion_matrix.Rd | 35 +++++++++++++++++++++++++++++++++++ man/fbeta_score.Rd | 6 +++--- man/fdr.Rd | 32 ++++++++++++++++++++++++++++++++ man/fnr.Rd | 31 +++++++++++++++++++++++++++++++ man/fomr.Rd | 29 +++++++++++++++++++++++++++++ man/fpr.Rd | 31 +++++++++++++++++++++++++++++++ man/lrn.Rd | 33 +++++++++++++++++++++++++++++++++ man/lrp.Rd | 33 +++++++++++++++++++++++++++++++++ man/npv.Rd | 28 ++++++++++++++++++++++++++++ man/ppv.Rd | 28 ++++++++++++++++++++++++++++ man/precision.Rd | 4 ++-- man/sensitivity.Rd | 29 +++++++++++++++++++++++++++++ man/specificity.Rd | 29 +++++++++++++++++++++++++++++ 13 files changed, 343 insertions(+), 5 deletions(-) create mode 100644 man/confusion_matrix.Rd create mode 100644 man/fdr.Rd create mode 100644 man/fnr.Rd create mode 100644 man/fomr.Rd create mode 100644 man/fpr.Rd create mode 100644 man/lrn.Rd create mode 100644 man/lrp.Rd create mode 100644 man/npv.Rd create mode 100644 man/ppv.Rd create mode 100644 man/sensitivity.Rd create mode 100644 man/specificity.Rd diff --git a/man/confusion_matrix.Rd b/man/confusion_matrix.Rd new file mode 100644 index 0000000..eac8951 --- /dev/null +++ b/man/confusion_matrix.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{confusion_matrix} +\alias{confusion_matrix} +\title{Binary confusion matrix} +\usage{ +confusion_matrix(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{confusion_matrix} Calculates a binary classification confusion matrix, +comparing the predicted with the actual values for the classes. +Assumes that 1 is used for the positive class and 0 for the +negative class. +} +\details{ +Returns a \code{data.frame} with columns corresponding to the +number of True Positives (\code{tp}), False Positives (\code{fp}), +True Negatives (\code{tn}), and False Negatives (\code{fn}) +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +confusion_matrix(actual, predicted) +} +\seealso{ +\code{\link{sensitivity}} \code{\link{specificity}} +} diff --git a/man/fbeta_score.Rd b/man/fbeta_score.Rd index aaceb11..eb8139c 100644 --- a/man/fbeta_score.Rd +++ b/man/fbeta_score.Rd @@ -14,10 +14,10 @@ class and 0 for the negative class.} class and 0 for the negative class. Each element represents the prediction for the corresponding element in \code{actual}.} -\item{beta}{A non-negative real number controlling how close the F-beta score is to -either Precision or Recall. When \code{beta} is at the default of 1, +\item{beta}{A non-negative real number controlling how close the F-beta score is to +either Precision or Recall. When \code{beta} is at the default of 1, the F-beta Score is exactly an equally weighted harmonic mean. -The F-beta score will weight toward Precision when \code{beta} is less +The F-beta score will weight toward Precision when \code{beta} is less than one. The F-beta score will weight toward Recall when \code{beta} is greater than one.} } diff --git a/man/fdr.Rd b/man/fdr.Rd new file mode 100644 index 0000000..8fd8ed5 --- /dev/null +++ b/man/fdr.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{fdr} +\alias{fdr} +\title{False Discovery Rate} +\usage{ +fdr(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{fdr} computes proportion of observations predicted to be in +the positive class (i.e. the element in \code{predicted} equals 1) that +actually belong to the negative class (i.e.the element in \code{actual} equals 0). +} +\details{ +It is implemented as \code{1 - ppv} +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +fpr(actual, predicted) +} +\seealso{ +\code{\link{confusion_matrix}} \code{\link{ppv}} +} diff --git a/man/fnr.Rd b/man/fnr.Rd new file mode 100644 index 0000000..7edb8c2 --- /dev/null +++ b/man/fnr.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{fnr} +\alias{fnr} +\title{False Negative Rate} +\usage{ +fnr(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{fnr} calculates the proportion of actual positives (\code{actual} equals 1) +that are not identified as such. +} +\details{ +It is defined as \code{1 - sensitivity} +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +fnr(actual, predicted) +} +\seealso{ +\code{\link{confusion_matrix}} \code{\link{sensitivity}} +} diff --git a/man/fomr.Rd b/man/fomr.Rd new file mode 100644 index 0000000..a708cbf --- /dev/null +++ b/man/fomr.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{fomr} +\alias{fomr} +\title{False Omission Rate} +\usage{ +fomr(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{fomr} is the complement of the Negative Predictive +Value (\code{npv}), and is the proportion of negative +results that are false negatives. +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +fomr(actual, predicted) +} +\seealso{ +\code{\link{confusion_matrix}} \code{\link{npv}} +} diff --git a/man/fpr.Rd b/man/fpr.Rd new file mode 100644 index 0000000..400b7b3 --- /dev/null +++ b/man/fpr.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{fpr} +\alias{fpr} +\title{False Positive Rate} +\usage{ +fpr(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{fnr} calculates the proportion of actual negative values (\code{actual} equals 0) +that are not identified as such. +} +\details{ +It is defined as \code{1 - specificity} +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +fpr(actual, predicted) +} +\seealso{ +\code{\link{confusion_matrix}} \code{\link{specificity}} +} diff --git a/man/lrn.Rd b/man/lrn.Rd new file mode 100644 index 0000000..d8cc794 --- /dev/null +++ b/man/lrn.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{lrn} +\alias{lrn} +\title{Negative Likelihood Ratio} +\usage{ +lrn(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{lrn} is used to assessing the value of performing a +diagnostic test, and estimates the ratio of the probability +of a true negative result over the probability of a false negative +result. +} +\details{ +It is implemented as the ratio: \code{(1 - sensitivity) / specificity} +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +lrn(actual, predicted) +} +\seealso{ +\code{\link{specificity}} \code{\link{sensitivity}} +} diff --git a/man/lrp.Rd b/man/lrp.Rd new file mode 100644 index 0000000..fbc4b35 --- /dev/null +++ b/man/lrp.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{lrp} +\alias{lrp} +\title{Positive Likelihood Ratio} +\usage{ +lrp(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{lrp} is used to assessing the value of performing a +diagnostic test, and estimates the ratio of the probability +of a true positive result over the probability of a false positive +result. +} +\details{ +It is implemented as the ratio: \code{sensitivity / (1 - specificity)} +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +lrp(actual, predicted) +} +\seealso{ +\code{\link{tpr}} \code{\link{fpr}} +} diff --git a/man/npv.Rd b/man/npv.Rd new file mode 100644 index 0000000..d31a84e --- /dev/null +++ b/man/npv.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{npv} +\alias{npv} +\title{Negative Predictive Value} +\usage{ +npv(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{ppv} calculates the proportion all predicted negative values (\code{predicted} equals 0) that +are true negative results. +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +npv(actual, predicted) +} +\seealso{ +\code{\link{confusion_matrix}} \code{\link{npv}} +} diff --git a/man/ppv.Rd b/man/ppv.Rd new file mode 100644 index 0000000..d3e36f8 --- /dev/null +++ b/man/ppv.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{ppv} +\alias{ppv} +\title{Positive Predictive Value} +\usage{ +ppv(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{ppv} calculates the proportion of all predicted positive values (\code{predicted} equals 1) that +are true positive results. It is also known as \code{precision} +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +ppv(actual, predicted) +} +\seealso{ +\code{\link{confusion_matrix}} \code{\link{npv}} \code{\link{precision}} +} diff --git a/man/precision.Rd b/man/precision.Rd index 5f27436..efe338f 100644 --- a/man/precision.Rd +++ b/man/precision.Rd @@ -17,7 +17,7 @@ prediction for the corresponding element in \code{actual}.} \description{ \code{precision} computes proportion of observations predicted to be in the positive class (i.e. the element in \code{predicted} equals 1) - that actually belong to the positive class (i.e. the element + that actually belong to the positive class (i.e. the element in \code{actual} equals 1) } \examples{ @@ -26,5 +26,5 @@ predicted <- c(1, 1, 1, 1, 1, 1) precision(actual, predicted) } \seealso{ -\code{\link{recall}} \code{\link{fbeta_score}} +\code{\link{recall}} \code{\link{fbeta_score}} \code{\link{ppv}} } diff --git a/man/sensitivity.Rd b/man/sensitivity.Rd new file mode 100644 index 0000000..75a2055 --- /dev/null +++ b/man/sensitivity.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{sensitivity} +\alias{sensitivity} +\title{Sensitivity} +\usage{ +sensitivity(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{sensitivity} calculates the proportion of actual positives (\code{actual} equals 1) +that are correctly identified as such. It is also known as +\code{true positive rate} or \code{recall}. +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +sensitivity(actual, predicted) +} +\seealso{ +\code{\link{confusion_matrix}} \code{\link{specificity}} +} diff --git a/man/specificity.Rd b/man/specificity.Rd new file mode 100644 index 0000000..a41523d --- /dev/null +++ b/man/specificity.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{specificity} +\alias{specificity} +\title{Specificity} +\usage{ +specificity(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{specificity} calculates the proportion of actual negatives (\code{actual} equals 0) +that are correctly identified as such. It is also known as +\code{true negative rate}. +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +specificity(actual, predicted) +} +\seealso{ +\code{\link{confusion_matrix}} \code{\link{sensitivity}} +} From 52f376dbf955a4ccb91269077e58ac300b9cc6b1 Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Mon, 29 Jul 2019 14:23:49 -0500 Subject: [PATCH 10/12] updating package description and namespace --- .Rbuildignore | 3 ++- DESCRIPTION | 5 +++-- NAMESPACE | 10 ++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 29dcfad..09aac15 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,4 +4,5 @@ proj$ ^NEWS.*$ .travis.yml ^CODE_OF_CONDUCT\.md$ -^CONTRIBUTING\.md$ \ No newline at end of file +^CONTRIBUTING\.md$ +^.*\.Rproj$ diff --git a/DESCRIPTION b/DESCRIPTION index 0309d2d..e352ad1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,11 +9,12 @@ Description: An implementation of evaluation metrics in R that are commonly Authors@R: c( person("Ben", "Hamner", role = c("aut", "cph"), email = "ben@benhamner.com"), person("Michael", "Frasco", role = c("aut", "cre"), email = "mfrasco6@gmail.com"), - person("Erin", "LeDell", role = c("ctb"), email = "oss@ledell.org")) + person("Erin", "LeDell", role = c("ctb"), email = "oss@ledell.org"), + person("Jesus", "Castagnetto", role = c("ctb"), email = "jesus@castagnetto.com")) Maintainer: Michael Frasco Suggests: testthat URL: https://github.com/mfrasco/Metrics BugReports: https://github.com/mfrasco/Metrics/issues License: BSD_3_clause + file LICENSE -RoxygenNote: 6.1.0 +RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE index fcfec79..422d2c1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,8 +12,14 @@ export(ce) export(explained_variation) export(f1) export(fbeta_score) +export(fdr) +export(fnr) +export(fomr) +export(fpr) export(ll) export(logLoss) +export(lrn) +export(lrp) export(mae) export(mape) export(mapk) @@ -21,7 +27,9 @@ export(mase) export(mdae) export(mse) export(msle) +export(npv) export(percent_bias) +export(ppv) export(precision) export(rae) export(recall) @@ -30,7 +38,9 @@ export(rmsle) export(rrse) export(rse) export(se) +export(sensitivity) export(sle) export(smape) +export(specificity) export(sse) importFrom(stats,median) From 2307368657102ecb244c8351859cd8a66c09b5ce Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Tue, 30 Jul 2019 10:27:14 -0500 Subject: [PATCH 11/12] added .RData --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 256128c..6b35920 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.RData *.Rproj # R History From a189e12c089c12dc39bc58568018e3d569cdc125 Mon Sep 17 00:00:00 2001 From: "Jesus M. Castagnetto" Date: Tue, 30 Jul 2019 10:27:31 -0500 Subject: [PATCH 12/12] simplifying code --- R/binary_classification.R | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/R/binary_classification.R b/R/binary_classification.R index 2aacd02..fe2abd8 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -160,13 +160,15 @@ fbeta_score <- function(actual, predicted, beta = 1) { #' confusion_matrix(actual, predicted) confusion_matrix <- function(actual, predicted) { binvals <- c(0, 1) + b_actual <- unique(actual) + b_predicted <- unique(predicted) # ideally "actual" should be a combination of 0s and 1s, # but could be all 0s or all 1s as degenerate cases if (!( - setequal(binvals, unique(actual)) | - setequal(c(0), unique(actual)) | - setequal(c(1), unique(actual)) + setequal(binvals, b_actual) | + setequal(c(0), b_actual) | + setequal(c(1), b_actual) )) { stop(paste("Expecting a vector of 0s and 1s for 'actual'. Got:", paste(actual, collapse = ", "))) @@ -174,9 +176,9 @@ confusion_matrix <- function(actual, predicted) { # "predicted" could be all 0s, all 1s, or a combination if (!( - setequal(binvals, unique(predicted)) | - setequal(c(0), unique(predicted)) | - setequal(c(1), unique(predicted)) + setequal(binvals, b_predicted) | + setequal(c(0), b_predicted) | + setequal(c(1), b_predicted) )) { stop(paste("Expecting a vector of 0s and 1s for 'predicted'. Got:", paste(predicted, collapse = ", "))) @@ -185,18 +187,19 @@ confusion_matrix <- function(actual, predicted) { if (length(actual) != length(predicted)) { stop( paste( - "Size of 'actual' and 'predicted' are not the same:", - length(actual), "!=", length(predicted) + "Size of 'actual' and 'predicted' are not the same: length(actual):", + length(actual), "!= length(predicted):", length(predicted) ) ) } - # explicit comparison - tp <- sum(actual == 1 & predicted == 1) - tn <- sum(actual == 0 & predicted == 0) - fn <- sum(actual == 1 & predicted == 0) - fp <- sum(actual == 0 & predicted == 1) - data.frame("tp" = tp, "fn" = fn, "fp" = fp, "tn" = tn) + # explicit comparisons + data.frame( + "tp" = sum(actual == 1 & predicted == 1), + "fn" = sum(actual == 1 & predicted == 0), + "fp" = sum(actual == 0 & predicted == 1), + "tn" = sum(actual == 0 & predicted == 0) + ) } #' Sensitivity