From bdfaabaf66927a9f32cb3ee5ea08326c7295f7e7 Mon Sep 17 00:00:00 2001 From: Mauricio 'Pacha' Vargas Sepulveda Date: Sat, 6 Jul 2024 17:05:27 -0400 Subject: [PATCH] streamline kendall cor functions --- R/kendall_correlation.R | 43 ++++++++++++++-------------------- man/kendall_cor_test.Rd | 2 +- src/05_kendall_correlation.cpp | 15 +++++++----- tests/testthat/test-kendall.R | 13 ++++++++-- 4 files changed, 39 insertions(+), 34 deletions(-) diff --git a/R/kendall_correlation.R b/R/kendall_correlation.R index ff048b7..0e719ee 100644 --- a/R/kendall_correlation.R +++ b/R/kendall_correlation.R @@ -31,14 +31,14 @@ #' @export kendall_cor <- function(x, y) { arr <- cbind(x, y) - storage.mode(arr) <- "double" + if (storage.mode(arr) != "double") { storage.mode(arr) <- "double" } arr <- arr[complete.cases(arr), ] - kw <- kendall_warnings(arr) + n <- ncol(arr) - if (isFALSE(kw)) { - return(NA_real_) - } + kw <- kendall_warnings(arr, n) + + if (isFALSE(kw)) { return(NA) } kendall_cor_(arr) } @@ -70,29 +70,22 @@ kendall_cor <- function(x, y) { #' kendall_cor_test(x, y) #' #' @export -kendall_cor_test <- function(x,y, +kendall_cor_test <- function(x, y, alternative = c("two.sided", "greater", "less")) { alternative <- match.arg(alternative) arr <- cbind(x, y) - storage.mode(arr) <- "double" + if (storage.mode(arr) != "double") { storage.mode(arr) <- "double" } arr <- arr[complete.cases(arr), ] - kw <- kendall_warnings(arr) + n <- nrow(arr) + m <- ncol(arr) - if (isFALSE(kw)) { - return(NA) - } + kw <- kendall_warnings(arr, m) - r <- kendall_cor_(arr) - n <- nrow(arr) + if (isFALSE(kw)) { return(NA) } - if (n < 2) { - stop("not enough finite observations") - } - - # r = correlation - # n = number of observations + r <- kendall_cor_(arr) if (n < 50) { q <- round((r + 1) * n * (n - 1) / 4) @@ -145,21 +138,21 @@ kendall_cor_test <- function(x,y, ) } -kendall_warnings <- function(arr) { - if (ncol(arr) != 2) { +kendall_warnings <- function(arr, n) { + if (n != 2) { stop("x and y must be uni-dimensional vectors") } - if (nrow(arr) < 2) { - stop("x and y must have at least 2 observations") + if (n < 2) { + stop("x and y must have at least 2 non-null observations") } - if (sd(arr[, 1]) == 0) { + if (var(arr[, 1]) == 0) { warning("x has zero variance") return(FALSE) } - if (sd(arr[, 2]) == 0) { + if (var(arr[, 2]) == 0) { warning("y has zero variance") return(FALSE) } diff --git a/man/kendall_cor_test.Rd b/man/kendall_cor_test.Rd index b817470..0bec3a2 100644 --- a/man/kendall_cor_test.Rd +++ b/man/kendall_cor_test.Rd @@ -4,7 +4,7 @@ \alias{kendall_cor_test} \title{Kendall Correlation Test} \usage{ -kendall_cor_test(x, y, alternative = c("two.sided", "greater", "less")) +kendall_cor_test(x, y, alternative = "two.sided") } \arguments{ \item{x}{a numeric vector.} diff --git a/src/05_kendall_correlation.cpp b/src/05_kendall_correlation.cpp index f902cfd..b22b7fc 100644 --- a/src/05_kendall_correlation.cpp +++ b/src/05_kendall_correlation.cpp @@ -3,6 +3,9 @@ // Data) // Filzmoser, Fritz, and Kalcher 2023 (pcaPP package) +// note: the len < 2 conditions are commented out because the R function checks +// for this condition before calling the C++ functions + #include #include #include @@ -13,9 +16,9 @@ using namespace cpp11; uint64_t insertion_sort_(double *arr, size_t len) { - if (len < 2) { - return 0; - } + // if (len < 2) { + // return 0; + // } size_t maxJ = len - 1, i; uint64_t swapCount = 0; @@ -70,9 +73,9 @@ static uint64_t merge_(double *from, double *to, size_t middle, size_t len) { } uint64_t merge_sort_(double *x, double *buf, size_t len) { - if (len < 2) { - return 0; - } + // if (len < 2) { + // return 0; + // } if (len < 10) { return insertion_sort_(x, len); diff --git a/tests/testthat/test-kendall.R b/tests/testthat/test-kendall.R index 0564938..baf6cf7 100644 --- a/tests/testthat/test-kendall.R +++ b/tests/testthat/test-kendall.R @@ -6,7 +6,7 @@ test_that("kendall", { expect_equal(kendall_cor(x, x), 1) x <- rep(1, 3) - expect_equal(kendall_cor(x, x), cor(x, x, method = "kendall")) + expect_warning(kendall_cor(x, x), "zero variance") x <- c(1, 0, 2) y <- c(5, 3, 4) @@ -34,6 +34,16 @@ test_that("kendall", { k2 <- cor.test(x, y, method = "kendall", alternative = "two.sided") expect_equal(k1$statistic, unname(k2$estimate)) expect_equal(k1$p_value, k2$p.value) + + k1 <- kendall_cor_test(x, y, alternative = "greater") + k2 <- cor.test(x, y, method = "kendall", alternative = "greater") + expect_equal(k1$statistic, unname(k2$estimate)) + expect_equal(k1$p_value, k2$p.value) + + k1 <- kendall_cor_test(x, y, alternative = "less") + k2 <- cor.test(x, y, method = "kendall", alternative = "less") + expect_equal(k1$statistic, unname(k2$estimate)) + expect_equal(k1$p_value, k2$p.value) x <- rnorm(1e3) y <- rpois(1e3, 2) @@ -48,7 +58,6 @@ test_that("kendall", { } t_kendall <- median(t_kendall) - t_cor <- c() for (i in 1:100) { t1 <- Sys.time()