From a02e4b935985cc33b1f1ede5c2c596acce3c4ca5 Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Wed, 18 Apr 2018 19:46:01 -0500 Subject: [PATCH 1/7] added precision, recall, f1_score --- R/binary_classification.R | 41 +++++++++++++++++++++ tests/testthat/test-binary_classification.R | 18 +++++++++ 2 files changed, 59 insertions(+) diff --git a/R/binary_classification.R b/R/binary_classification.R index affaf16..8505788 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -76,3 +76,44 @@ ll <- function(actual, predicted) { logLoss <- function(actual, predicted) { return(mean(ll(actual, predicted))) } + + + +#' Precision +#' +#' \code{precision} computes proportion of predicted 1's that are actual 1's +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 1, 1, 1, 1, 1) +#' precision(actual, predicted) +precision <- function(actual, predicted) { + return(mean(actual[predicted == 1])) +} + +#' Recall +#' +#' \code{recall} computes proportion of actual 1's that are predicted 1's +#' @export +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' recall(actual, predicted) +recall <- function(actual, predicted) { + return(mean(predicted[actual == 1])) +} + +#' F1 score +#' +#' \code{f1_score} computes the f1 score +#' @export +#' @seealso \code{\link{precision}}, \code{\link{recall}} +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' recall(actual, predicted) +f1_score <- function(actual, predicted) { + prec = precision(actual, predicted) + rec = recall(actual, predicted) + return(2 * prec * rec / (prec + rec)) +} diff --git a/tests/testthat/test-binary_classification.R b/tests/testthat/test-binary_classification.R index ff8c043..f5bf3fc 100644 --- a/tests/testthat/test-binary_classification.R +++ b/tests/testthat/test-binary_classification.R @@ -21,3 +21,21 @@ test_that('mean los loss is calculated correctly', { expect_equal(logLoss(c(1,1,1,0,0,0),c(.5,.1,.01,.9,.75,.001)), 1.881797068998267) }) +test_that('precision is calculated correctly', { + expect_equal(precision(c(1,1,0,0),c(1,1,0,0)), 1) + expect_equal(precision(c(0,0,1,1),c(1,1,0,0)), 0) + expect_equal(precision(c(1,1,0,0),c(1,1,1,1)), 1.2) +}) + +test_that('recall is calculated correctly', { + expect_equal(recall(c(1,1,0,0),c(1,1,0,0)), 1) + expect_equal(recall(c(0,0,1,1),c(1,1,0,0)), 0) + expect_equal(recall(c(1,1,1,1),c(1,0,0,1)), 1/2) +}) + +test_that('f1 score is calculated correctly',{ + expect_equal(f1_score(c(1,1,0,0),c(1,1,0,0)), 1) + expect_equal(f1_score(c(0,0,1,1),c(1,1,1,0)), 2/5) + expect_equal(f1_score(c(1,1,1,1),c(1,0,0,1)), 2/3) +}) + From ef2ef973d274f290066023921944cb4003358795 Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Wed, 18 Apr 2018 19:51:07 -0500 Subject: [PATCH 2/7] added beta to f1_score --- R/binary_classification.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/binary_classification.R b/R/binary_classification.R index 8505788..7dd2a4a 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -112,8 +112,8 @@ recall <- function(actual, predicted) { #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' recall(actual, predicted) -f1_score <- function(actual, predicted) { +f1_score <- function(actual, predicted, beta = 1) { prec = precision(actual, predicted) rec = recall(actual, predicted) - return(2 * prec * rec / (prec + rec)) + return((1 + beta^2) * prec * rec / ((beta^2 * prec) + rec)) } From f1168421883bb13e636b5ee71dc2c5c218869b66 Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Wed, 18 Apr 2018 19:54:33 -0500 Subject: [PATCH 3/7] typo in test --- tests/testthat/test-binary_classification.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-binary_classification.R b/tests/testthat/test-binary_classification.R index f5bf3fc..f013c72 100644 --- a/tests/testthat/test-binary_classification.R +++ b/tests/testthat/test-binary_classification.R @@ -24,7 +24,7 @@ test_that('mean los loss is calculated correctly', { test_that('precision is calculated correctly', { expect_equal(precision(c(1,1,0,0),c(1,1,0,0)), 1) expect_equal(precision(c(0,0,1,1),c(1,1,0,0)), 0) - expect_equal(precision(c(1,1,0,0),c(1,1,1,1)), 1.2) + expect_equal(precision(c(1,1,0,0),c(1,1,1,1)), 1/2) }) test_that('recall is calculated correctly', { From f57c61394504f3982f14b0fb566b8656748142ed Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Wed, 25 Apr 2018 00:29:58 -0500 Subject: [PATCH 4/7] updated documentation and formatting --- NAMESPACE | 3 +++ R/binary_classification.R | 43 +++++++++++++++++++++++++++++---------- man/fbeta_score.Rd | 35 +++++++++++++++++++++++++++++++ man/precision.Rd | 30 +++++++++++++++++++++++++++ man/recall.Rd | 30 +++++++++++++++++++++++++++ 5 files changed, 130 insertions(+), 11 deletions(-) create mode 100644 man/fbeta_score.Rd create mode 100644 man/precision.Rd create mode 100644 man/recall.Rd diff --git a/NAMESPACE b/NAMESPACE index 8d50f77..8766f86 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(auc) export(bias) export(ce) export(f1) +export(fbeta_score) export(ll) export(logLoss) export(mae) @@ -20,7 +21,9 @@ export(mdae) export(mse) export(msle) export(percent_bias) +export(precision) export(rae) +export(recall) export(rmse) export(rmsle) export(rrse) diff --git a/R/binary_classification.R b/R/binary_classification.R index 7dd2a4a..ec28cf5 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -81,39 +81,60 @@ logLoss <- function(actual, predicted) { #' Precision #' -#' \code{precision} computes proportion of predicted 1's that are actual 1's +#' \code{precision} computes proportion of observations predicted to be in the +#' positive class (i.e. the element in \code{predicted} equals 1) +#' that actually belong to the positive class (i.e. the element +#' in \code{actual} equals 1) +#' +#' @inheritParams params_binary #' @export +#' @seealso \code{\link{recall}} \code{\link{fbeta_score}} #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 1, 1, 1, 1, 1) #' precision(actual, predicted) precision <- function(actual, predicted) { - return(mean(actual[predicted == 1])) + return(mean(actual[predicted == 1])) } #' Recall #' -#' \code{recall} computes proportion of actual 1's that are predicted 1's +#' \code{recall} computes proportion of observations in the positive class +#' (i.e. the element in \code{actual} equals 1) that are predicted +#' to be in the positive class (i.e. the element in \code{predicted} +#' equals 1) +#' +#' @inheritParams params_binary #' @export +#' @seealso \code{\link{precision}} \code{\link{fbeta_score}} #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' recall(actual, predicted) recall <- function(actual, predicted) { - return(mean(predicted[actual == 1])) + return(mean(predicted[actual == 1])) } -#' F1 score +#' F-beta Score #' -#' \code{f1_score} computes the f1 score +#' \code{fbeta_score} computes a weighted harmonic mean of Precision and Recall. +#' The \code{beta} parameter controls the weighting. +#' +#' @inheritParams params_binary +#' @param beta A non-negative real number controlling how close the F-beta score is to +#' either Precision or Recall. When \code{beta} is at the default of 1, +#' the F-beta Score is exactly an equally weighted harmonic mean. +#' The F-beta score will weight toward Precision when \code{beta} is close +#' to zero. The F-beta score will weight toward Recall for large values of +#' \code{beta}. #' @export -#' @seealso \code{\link{precision}}, \code{\link{recall}} +#' @seealso \code{\link{precision}} \code{\link{recall}} #' @examples #' actual <- c(1, 1, 1, 0, 0, 0) #' predicted <- c(1, 0, 1, 1, 1, 1) #' recall(actual, predicted) -f1_score <- function(actual, predicted, beta = 1) { - prec = precision(actual, predicted) - rec = recall(actual, predicted) - return((1 + beta^2) * prec * rec / ((beta^2 * prec) + rec)) +fbeta_score <- function(actual, predicted, beta = 1) { + prec <- precision(actual, predicted) + rec <- recall(actual, predicted) + return((1 + beta^2) * prec * rec / ((beta^2 * prec) + rec)) } diff --git a/man/fbeta_score.Rd b/man/fbeta_score.Rd new file mode 100644 index 0000000..507d2e1 --- /dev/null +++ b/man/fbeta_score.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{fbeta_score} +\alias{fbeta_score} +\title{F-beta Score} +\usage{ +fbeta_score(actual, predicted, beta = 1) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} + +\item{beta}{A non-negative real number controlling how close the F-beta score is to +either Precision or Recall. When \code{beta} is at the default of 1, +the F-beta Score is exactly an equally weighted harmonic mean. +The F-beta score will weight toward Precision when \code{beta} is close +to zero. The F-beta score will weight toward Recall for large values of +\code{beta}.} +} +\description{ +\code{fbeta_score} computes a weighted harmonic mean of Precision and Recall. + The \code{beta} parameter controls the weighting. +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +recall(actual, predicted) +} +\seealso{ +\code{\link{precision}} \code{\link{recall}} +} diff --git a/man/precision.Rd b/man/precision.Rd new file mode 100644 index 0000000..5f27436 --- /dev/null +++ b/man/precision.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{precision} +\alias{precision} +\title{Precision} +\usage{ +precision(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{precision} computes proportion of observations predicted to be in the + positive class (i.e. the element in \code{predicted} equals 1) + that actually belong to the positive class (i.e. the element + in \code{actual} equals 1) +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 1, 1, 1, 1, 1) +precision(actual, predicted) +} +\seealso{ +\code{\link{recall}} \code{\link{fbeta_score}} +} diff --git a/man/recall.Rd b/man/recall.Rd new file mode 100644 index 0000000..d314035 --- /dev/null +++ b/man/recall.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{recall} +\alias{recall} +\title{Recall} +\usage{ +recall(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{recall} computes proportion of observations in the positive class + (i.e. the element in \code{actual} equals 1) that are predicted + to be in the positive class (i.e. the element in \code{predicted} + equals 1) +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +recall(actual, predicted) +} +\seealso{ +\code{\link{precision}} \code{\link{fbeta_score}} +} From ea7f7402371e2f4fa861ab120b2dbf62ef450203 Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Wed, 25 Apr 2018 00:39:16 -0500 Subject: [PATCH 5/7] fixed test --- tests/testthat/test-binary_classification.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-binary_classification.R b/tests/testthat/test-binary_classification.R index f013c72..b94d98f 100644 --- a/tests/testthat/test-binary_classification.R +++ b/tests/testthat/test-binary_classification.R @@ -33,9 +33,10 @@ test_that('recall is calculated correctly', { expect_equal(recall(c(1,1,1,1),c(1,0,0,1)), 1/2) }) -test_that('f1 score is calculated correctly',{ - expect_equal(f1_score(c(1,1,0,0),c(1,1,0,0)), 1) - expect_equal(f1_score(c(0,0,1,1),c(1,1,1,0)), 2/5) - expect_equal(f1_score(c(1,1,1,1),c(1,0,0,1)), 2/3) +test_that('f-beta score is calculated correctly',{ + expect_equal(fbeta_score(c(1,1,0,0),c(1,1,0,0)), 1) + expect_equal(fbeta_score(c(0,0,1,1),c(1,1,1,0)), 2/5) + expect_equal(fbeta_score(c(1,1,1,1),c(1,0,0,1)), 2/3) + expect_equal(fbeta_score(c(1,1,0,0),c(1,1,1,1),beta=0), 1/2) }) From 1c8ebee6fa4467ff75824a5381a9393cf04fdf21 Mon Sep 17 00:00:00 2001 From: Michael Horrell Date: Wed, 25 Apr 2018 08:42:53 -0500 Subject: [PATCH 6/7] adjusted fbeta documentation --- R/binary_classification.R | 6 +++--- man/fbeta_score.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/binary_classification.R b/R/binary_classification.R index ec28cf5..b8a2aa7 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -124,9 +124,9 @@ recall <- function(actual, predicted) { #' @param beta A non-negative real number controlling how close the F-beta score is to #' either Precision or Recall. When \code{beta} is at the default of 1, #' the F-beta Score is exactly an equally weighted harmonic mean. -#' The F-beta score will weight toward Precision when \code{beta} is close -#' to zero. The F-beta score will weight toward Recall for large values of -#' \code{beta}. +#' The F-beta score will weight toward Precision when \code{beta} is less +#' than one. The F-beta score will weight toward Recall when \code{beta} is +#' greater than one. #' @export #' @seealso \code{\link{precision}} \code{\link{recall}} #' @examples diff --git a/man/fbeta_score.Rd b/man/fbeta_score.Rd index 507d2e1..aaceb11 100644 --- a/man/fbeta_score.Rd +++ b/man/fbeta_score.Rd @@ -17,9 +17,9 @@ prediction for the corresponding element in \code{actual}.} \item{beta}{A non-negative real number controlling how close the F-beta score is to either Precision or Recall. When \code{beta} is at the default of 1, the F-beta Score is exactly an equally weighted harmonic mean. -The F-beta score will weight toward Precision when \code{beta} is close -to zero. The F-beta score will weight toward Recall for large values of -\code{beta}.} +The F-beta score will weight toward Precision when \code{beta} is less +than one. The F-beta score will weight toward Recall when \code{beta} is +greater than one.} } \description{ \code{fbeta_score} computes a weighted harmonic mean of Precision and Recall. From ac43891a6214d60e73ca31d9d3f79c56ec14fd9c Mon Sep 17 00:00:00 2001 From: mthorrell Date: Sun, 13 May 2018 12:49:11 -0400 Subject: [PATCH 7/7] brought branch up to date and updated docs --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 5619a61..96e486e 100644 --- a/README.md +++ b/README.md @@ -43,3 +43,6 @@ All functions in the **Metrics** package take at least two arguments: `actual` a | binary classification | Area Under ROC Curve | auc | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cint_0%5E1%20%5B1%20-%20G_1%28G%5E%7B-1%7D_0%281%20-%20v%29%29%5D%20dv). `help(auc)` for details. | | binary classification | Log Loss | ll | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20x_i%20*%20%5Cln%28y_i%29%20+%20%281%20-%20x_i%29%20*%20%5Cln%281%20-%20y_i%29) | | binary classification | Mean Log Loss | logloss | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7Bn%7D%20%5Csum_%7Bi%3D1%7D%5En%20x_i%20*%20%5Cln%28y_i%29%20+%20%281%20-%20x_i%29%20*%20%5Cln%281%20-%20y_i%29) | +| binary classification | Precision | precision | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7B%5Csum%20I%28y_i%20%3D%201%29%7D%5Csum_%7Bi%3D1%7D%20%5E%7Bn%7D%20I%28y_i%20%3D%201%29x_i) | +| binary classification | Recall | recall | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7B%5Csum%20I%28x_i%20%3D%201%29%7D%5Csum_%7Bi%3D1%7D%20%5E%7Bn%7D%20I%28x_i%20%3D%201%29y_i) | +| binary classification | F-beta Score | fbeta_score | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%281%20+%20%5Cbeta%5E2%29%20%5Cfrac%7B%5Ctext%7Bprecision%7D%20*%20%5Ctext%7Brecall%7D%7D%7B%20%28%5Cbeta%5E2%20*%20%5Ctext%7Bprecision%7D%29%20+%20%5Ctext%7Brecall%7D%7D) |