diff --git a/NAMESPACE b/NAMESPACE index 8d50f77..8766f86 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(auc) export(bias) export(ce) export(f1) +export(fbeta_score) export(ll) export(logLoss) export(mae) @@ -20,7 +21,9 @@ export(mdae) export(mse) export(msle) export(percent_bias) +export(precision) export(rae) +export(recall) export(rmse) export(rmsle) export(rrse) diff --git a/R/binary_classification.R b/R/binary_classification.R index affaf16..b8a2aa7 100644 --- a/R/binary_classification.R +++ b/R/binary_classification.R @@ -76,3 +76,65 @@ ll <- function(actual, predicted) { logLoss <- function(actual, predicted) { return(mean(ll(actual, predicted))) } + + + +#' Precision +#' +#' \code{precision} computes proportion of observations predicted to be in the +#' positive class (i.e. the element in \code{predicted} equals 1) +#' that actually belong to the positive class (i.e. the element +#' in \code{actual} equals 1) +#' +#' @inheritParams params_binary +#' @export +#' @seealso \code{\link{recall}} \code{\link{fbeta_score}} +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 1, 1, 1, 1, 1) +#' precision(actual, predicted) +precision <- function(actual, predicted) { + return(mean(actual[predicted == 1])) +} + +#' Recall +#' +#' \code{recall} computes proportion of observations in the positive class +#' (i.e. the element in \code{actual} equals 1) that are predicted +#' to be in the positive class (i.e. the element in \code{predicted} +#' equals 1) +#' +#' @inheritParams params_binary +#' @export +#' @seealso \code{\link{precision}} \code{\link{fbeta_score}} +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' recall(actual, predicted) +recall <- function(actual, predicted) { + return(mean(predicted[actual == 1])) +} + +#' F-beta Score +#' +#' \code{fbeta_score} computes a weighted harmonic mean of Precision and Recall. +#' The \code{beta} parameter controls the weighting. +#' +#' @inheritParams params_binary +#' @param beta A non-negative real number controlling how close the F-beta score is to +#' either Precision or Recall. When \code{beta} is at the default of 1, +#' the F-beta Score is exactly an equally weighted harmonic mean. +#' The F-beta score will weight toward Precision when \code{beta} is less +#' than one. The F-beta score will weight toward Recall when \code{beta} is +#' greater than one. +#' @export +#' @seealso \code{\link{precision}} \code{\link{recall}} +#' @examples +#' actual <- c(1, 1, 1, 0, 0, 0) +#' predicted <- c(1, 0, 1, 1, 1, 1) +#' recall(actual, predicted) +fbeta_score <- function(actual, predicted, beta = 1) { + prec <- precision(actual, predicted) + rec <- recall(actual, predicted) + return((1 + beta^2) * prec * rec / ((beta^2 * prec) + rec)) +} diff --git a/README.md b/README.md index 5619a61..96e486e 100644 --- a/README.md +++ b/README.md @@ -43,3 +43,6 @@ All functions in the **Metrics** package take at least two arguments: `actual` a | binary classification | Area Under ROC Curve | auc | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cint_0%5E1%20%5B1%20-%20G_1%28G%5E%7B-1%7D_0%281%20-%20v%29%29%5D%20dv). `help(auc)` for details. | | binary classification | Log Loss | ll | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20x_i%20*%20%5Cln%28y_i%29%20+%20%281%20-%20x_i%29%20*%20%5Cln%281%20-%20y_i%29) | | binary classification | Mean Log Loss | logloss | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7Bn%7D%20%5Csum_%7Bi%3D1%7D%5En%20x_i%20*%20%5Cln%28y_i%29%20+%20%281%20-%20x_i%29%20*%20%5Cln%281%20-%20y_i%29) | +| binary classification | Precision | precision | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7B%5Csum%20I%28y_i%20%3D%201%29%7D%5Csum_%7Bi%3D1%7D%20%5E%7Bn%7D%20I%28y_i%20%3D%201%29x_i) | +| binary classification | Recall | recall | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%5Cfrac%7B1%7D%7B%5Csum%20I%28x_i%20%3D%201%29%7D%5Csum_%7Bi%3D1%7D%20%5E%7Bn%7D%20I%28x_i%20%3D%201%29y_i) | +| binary classification | F-beta Score | fbeta_score | ![equation](https://latex.codecogs.com/gif.latex?%5Cdpi%7B150%7D%20%281%20+%20%5Cbeta%5E2%29%20%5Cfrac%7B%5Ctext%7Bprecision%7D%20*%20%5Ctext%7Brecall%7D%7D%7B%20%28%5Cbeta%5E2%20*%20%5Ctext%7Bprecision%7D%29%20+%20%5Ctext%7Brecall%7D%7D) | diff --git a/man/fbeta_score.Rd b/man/fbeta_score.Rd new file mode 100644 index 0000000..aaceb11 --- /dev/null +++ b/man/fbeta_score.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{fbeta_score} +\alias{fbeta_score} +\title{F-beta Score} +\usage{ +fbeta_score(actual, predicted, beta = 1) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} + +\item{beta}{A non-negative real number controlling how close the F-beta score is to +either Precision or Recall. When \code{beta} is at the default of 1, +the F-beta Score is exactly an equally weighted harmonic mean. +The F-beta score will weight toward Precision when \code{beta} is less +than one. The F-beta score will weight toward Recall when \code{beta} is +greater than one.} +} +\description{ +\code{fbeta_score} computes a weighted harmonic mean of Precision and Recall. + The \code{beta} parameter controls the weighting. +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +recall(actual, predicted) +} +\seealso{ +\code{\link{precision}} \code{\link{recall}} +} diff --git a/man/precision.Rd b/man/precision.Rd new file mode 100644 index 0000000..5f27436 --- /dev/null +++ b/man/precision.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{precision} +\alias{precision} +\title{Precision} +\usage{ +precision(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{precision} computes proportion of observations predicted to be in the + positive class (i.e. the element in \code{predicted} equals 1) + that actually belong to the positive class (i.e. the element + in \code{actual} equals 1) +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 1, 1, 1, 1, 1) +precision(actual, predicted) +} +\seealso{ +\code{\link{recall}} \code{\link{fbeta_score}} +} diff --git a/man/recall.Rd b/man/recall.Rd new file mode 100644 index 0000000..d314035 --- /dev/null +++ b/man/recall.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_classification.R +\name{recall} +\alias{recall} +\title{Recall} +\usage{ +recall(actual, predicted) +} +\arguments{ +\item{actual}{The ground truth binary numeric vector containing 1 for the positive +class and 0 for the negative class.} + +\item{predicted}{The predicted binary numeric vector containing 1 for the positive +class and 0 for the negative class. Each element represents the +prediction for the corresponding element in \code{actual}.} +} +\description{ +\code{recall} computes proportion of observations in the positive class + (i.e. the element in \code{actual} equals 1) that are predicted + to be in the positive class (i.e. the element in \code{predicted} + equals 1) +} +\examples{ +actual <- c(1, 1, 1, 0, 0, 0) +predicted <- c(1, 0, 1, 1, 1, 1) +recall(actual, predicted) +} +\seealso{ +\code{\link{precision}} \code{\link{fbeta_score}} +} diff --git a/tests/testthat/test-binary_classification.R b/tests/testthat/test-binary_classification.R index ff8c043..b94d98f 100644 --- a/tests/testthat/test-binary_classification.R +++ b/tests/testthat/test-binary_classification.R @@ -21,3 +21,22 @@ test_that('mean los loss is calculated correctly', { expect_equal(logLoss(c(1,1,1,0,0,0),c(.5,.1,.01,.9,.75,.001)), 1.881797068998267) }) +test_that('precision is calculated correctly', { + expect_equal(precision(c(1,1,0,0),c(1,1,0,0)), 1) + expect_equal(precision(c(0,0,1,1),c(1,1,0,0)), 0) + expect_equal(precision(c(1,1,0,0),c(1,1,1,1)), 1/2) +}) + +test_that('recall is calculated correctly', { + expect_equal(recall(c(1,1,0,0),c(1,1,0,0)), 1) + expect_equal(recall(c(0,0,1,1),c(1,1,0,0)), 0) + expect_equal(recall(c(1,1,1,1),c(1,0,0,1)), 1/2) +}) + +test_that('f-beta score is calculated correctly',{ + expect_equal(fbeta_score(c(1,1,0,0),c(1,1,0,0)), 1) + expect_equal(fbeta_score(c(0,0,1,1),c(1,1,1,0)), 2/5) + expect_equal(fbeta_score(c(1,1,1,1),c(1,0,0,1)), 2/3) + expect_equal(fbeta_score(c(1,1,0,0),c(1,1,1,1),beta=0), 1/2) +}) +