From bdfaabaf66927a9f32cb3ee5ea08326c7295f7e7 Mon Sep 17 00:00:00 2001
From: Mauricio 'Pacha' Vargas Sepulveda <m.sepulveda@mail.utoronto.ca>
Date: Sat, 6 Jul 2024 17:05:27 -0400
Subject: [PATCH] streamline kendall cor functions

---
 R/kendall_correlation.R        | 43 ++++++++++++++--------------------
 man/kendall_cor_test.Rd        |  2 +-
 src/05_kendall_correlation.cpp | 15 +++++++-----
 tests/testthat/test-kendall.R  | 13 ++++++++--
 4 files changed, 39 insertions(+), 34 deletions(-)
diff --git a/R/kendall_correlation.R b/R/kendall_correlation.R
index ff048b7..0e719ee 100644
--- a/R/kendall_correlation.R
+++ b/R/kendall_correlation.R
@@ -31,14 +31,14 @@
 #' @export
 kendall_cor <- function(x, y) {
   arr <- cbind(x, y)
-  storage.mode(arr) <- "double"
+  if (storage.mode(arr) != "double") { storage.mode(arr) <- "double" }
   arr <- arr[complete.cases(arr), ]
 
-  kw <- kendall_warnings(arr)
+  n <- ncol(arr)
 
-  if (isFALSE(kw)) {
-    return(NA_real_)
-  }
+  kw <- kendall_warnings(arr, n)
+
+  if (isFALSE(kw)) { return(NA) }
 
   kendall_cor_(arr)
 }
@@ -70,29 +70,22 @@ kendall_cor <- function(x, y) {
 #' kendall_cor_test(x, y)
 #' 
 #' @export
-kendall_cor_test <- function(x,y,
+kendall_cor_test <- function(x, y,
   alternative = c("two.sided", "greater", "less")) {
   alternative <- match.arg(alternative)
 
   arr <- cbind(x, y)
-  storage.mode(arr) <- "double"
+  if (storage.mode(arr) != "double") { storage.mode(arr) <- "double" }
   arr <- arr[complete.cases(arr), ]
 
-  kw <- kendall_warnings(arr)
+  n <- nrow(arr)
+  m <- ncol(arr)
 
-  if (isFALSE(kw)) {
-    return(NA)
-  }
+  kw <- kendall_warnings(arr, m)
 
-  r <- kendall_cor_(arr)
-  n <- nrow(arr)
+  if (isFALSE(kw)) { return(NA) }
 
-  if (n < 2) {
-    stop("not enough finite observations")
-  }
-  
-  # r = correlation
-  # n = number of observations
+  r <- kendall_cor_(arr)
 
   if (n < 50) {
     q <- round((r + 1) * n * (n - 1) / 4)
@@ -145,21 +138,21 @@ kendall_cor_test <- function(x,y,
   )
 }
 
-kendall_warnings <- function(arr) {
-  if (ncol(arr) != 2) {
+kendall_warnings <- function(arr, n) {
+  if (n != 2) {
     stop("x and y must be uni-dimensional vectors")
   }
 
-  if (nrow(arr) < 2) {
-    stop("x and y must have at least 2 observations")
+  if (n < 2) {
+    stop("x and y must have at least 2 non-null observations")
   }
 
-  if (sd(arr[, 1]) == 0) {
+  if (var(arr[, 1]) == 0) {
     warning("x has zero variance")
     return(FALSE)
   }
 
-  if (sd(arr[, 2]) == 0) {
+  if (var(arr[, 2]) == 0) {
     warning("y has zero variance")
     return(FALSE)
   }
diff --git a/man/kendall_cor_test.Rd b/man/kendall_cor_test.Rd
index b817470..0bec3a2 100644
--- a/man/kendall_cor_test.Rd
+++ b/man/kendall_cor_test.Rd
@@ -4,7 +4,7 @@
 \alias{kendall_cor_test}
 \title{Kendall Correlation Test}
 \usage{
-kendall_cor_test(x, y, alternative = c("two.sided", "greater", "less"))
+kendall_cor_test(x, y, alternative = "two.sided")
 }
 \arguments{
 \item{x}{a numeric vector.}
diff --git a/src/05_kendall_correlation.cpp b/src/05_kendall_correlation.cpp
index f902cfd..b22b7fc 100644
--- a/src/05_kendall_correlation.cpp
+++ b/src/05_kendall_correlation.cpp
@@ -3,6 +3,9 @@
 // Data)
 // Filzmoser, Fritz, and Kalcher 2023 (pcaPP package)
 
+// note: the len < 2 conditions are commented out because the R function checks
+// for this condition before calling the C++ functions
+
 #include <algorithm>
 #include <cmath>
 #include <cpp11.hpp>
@@ -13,9 +16,9 @@
 using namespace cpp11;
 
 uint64_t insertion_sort_(double *arr, size_t len) {
-  if (len < 2) {
-    return 0;
-  }
+  // if (len < 2) {
+  //   return 0;
+  // }
 
   size_t maxJ = len - 1, i;
   uint64_t swapCount = 0;
@@ -70,9 +73,9 @@ static uint64_t merge_(double *from, double *to, size_t middle, size_t len) {
 }
 
 uint64_t merge_sort_(double *x, double *buf, size_t len) {
-  if (len < 2) {
-    return 0;
-  }
+  // if (len < 2) {
+  //   return 0;
+  // }
 
   if (len < 10) {
     return insertion_sort_(x, len);
diff --git a/tests/testthat/test-kendall.R b/tests/testthat/test-kendall.R
index 0564938..baf6cf7 100644
--- a/tests/testthat/test-kendall.R
+++ b/tests/testthat/test-kendall.R
@@ -6,7 +6,7 @@ test_that("kendall", {
   expect_equal(kendall_cor(x, x), 1)
 
   x <- rep(1, 3)
-  expect_equal(kendall_cor(x, x), cor(x, x, method = "kendall"))
+  expect_warning(kendall_cor(x, x), "zero variance")
 
   x <- c(1, 0, 2)
   y <- c(5, 3, 4)
@@ -34,6 +34,16 @@ test_that("kendall", {
   k2 <- cor.test(x, y, method = "kendall", alternative = "two.sided")
   expect_equal(k1$statistic, unname(k2$estimate))
   expect_equal(k1$p_value, k2$p.value)
+
+  k1 <- kendall_cor_test(x, y, alternative = "greater")
+  k2 <- cor.test(x, y, method = "kendall", alternative = "greater")
+  expect_equal(k1$statistic, unname(k2$estimate))
+  expect_equal(k1$p_value, k2$p.value)
+
+  k1 <- kendall_cor_test(x, y, alternative = "less")
+  k2 <- cor.test(x, y, method = "kendall", alternative = "less")
+  expect_equal(k1$statistic, unname(k2$estimate))
+  expect_equal(k1$p_value, k2$p.value)
   
   x <- rnorm(1e3)
   y <- rpois(1e3, 2)
@@ -48,7 +58,6 @@ test_that("kendall", {
   }
   t_kendall <- median(t_kendall)
 
-
   t_cor <- c()
   for (i in 1:100) {
     t1 <- Sys.time()