diff --git a/r/NEWS.md b/r/NEWS.md index 83d09157b9038..a01a9217a4329 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,10 @@ # arrow 18.1.0.9000 +## Minor improvements and fixes + +- Added bindings for atan, sinh, cosh, tanh, asinh, acosh, and tanh, and expm1 (#44953) + # arrow 18.1.0 ## Minor improvements and fixes diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index ba513ef470cfb..8f532fb522c20 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -128,6 +128,7 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { log10 = eval_array_expression("log10_checked", x), log2 = eval_array_expression("log2_checked", x), log1p = eval_array_expression("log1p_checked", x), + expm1 = eval_array_expression("expm1", x), round = eval_array_expression( "round", x, @@ -139,17 +140,16 @@ Math.ArrowDatum <- function(x, ..., base = exp(1), digits = 0) { cumprod = eval_array_expression("cumulative_prod_checked", x), cummax = eval_array_expression("cumulative_max", x), cummin = eval_array_expression("cumulative_min", x), + cosh = eval_array_expression("cosh", x), + sinh = eval_array_expression("sinh", x), + tanh = eval_array_expression("tanh", x), + acosh = eval_array_expression("acosh_checked", x), + asinh = eval_array_expression("asinh", x), + atanh = eval_array_expression("atanh_checked", x), signif = , - expm1 = , cospi = , sinpi = , tanpi = , - cosh = , - sinh = , - tanh = , - acosh = , - asinh = , - atanh = , lgamma = , gamma = , digamma = , diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R index 4f90dd16b266f..470c89ecc3dff 100644 --- a/r/R/dplyr-funcs-doc.R +++ b/r/R/dplyr-funcs-doc.R @@ -21,7 +21,7 @@ #' #' The `arrow` package contains methods for 37 `dplyr` table functions, many of #' which are "verbs" that do transformations to one or more tables. -#' The package also has mappings of 212 R functions to the corresponding +#' The package also has mappings of 221 R functions to the corresponding #' functions in the Arrow compute library. These allow you to write code inside #' of `dplyr` methods that call R functions, including many in packages like #' `stringr` and `lubridate`, and they will get translated to Arrow and run @@ -42,7 +42,7 @@ #' * [`collect()`][dplyr::collect()] #' * [`compute()`][dplyr::compute()] #' * [`count()`][dplyr::count()] -#' * [`distinct()`][dplyr::distinct()]: `.keep_all = TRUE` not supported +#' * [`distinct()`][dplyr::distinct()]: `.keep_all = TRUE` returns a non-missing value if present, only returning missing values if all are missing. #' * [`explain()`][dplyr::explain()] #' * [`filter()`][dplyr::filter()] #' * [`full_join()`][dplyr::full_join()]: the `copy` argument is ignored @@ -83,7 +83,7 @@ #' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both #' `str_sub()` and `stringr::str_sub()` work. #' -#' In addition to these functions, you can call any of Arrow's 262 compute +#' In addition to these functions, you can call any of Arrow's 271 compute #' functions directly. Arrow has many functions that don't map to an existing R #' function. In other cases where there is an R function mapping, you can still #' call the Arrow function directly if you don't want the adaptations that the R @@ -96,6 +96,7 @@ #' #' * [`add_filename()`][arrow::add_filename()] #' * [`cast()`][arrow::cast()] +#' * [`one()`][arrow::one()] #' #' ## base #' @@ -119,6 +120,7 @@ #' * [`^`][^()] #' * [`abs()`][base::abs()] #' * [`acos()`][base::acos()] +#' * [`acosh()`][base::acosh()] #' * [`all()`][base::all()] #' * [`any()`][base::any()] #' * [`as.Date()`][base::as.Date()]: Multiple `tryFormats` not supported in Arrow. @@ -130,14 +132,19 @@ #' * [`as.logical()`][base::as.logical()] #' * [`as.numeric()`][base::as.numeric()] #' * [`asin()`][base::asin()] +#' * [`asinh()`][base::asinh()] +#' * [`atan()`][base::atan()] +#' * [`atanh()`][base::atanh()] #' * [`ceiling()`][base::ceiling()] #' * [`cos()`][base::cos()] +#' * [`cosh()`][base::cosh()] #' * [`data.frame()`][base::data.frame()]: `row.names` and `check.rows` arguments not supported; #' `stringsAsFactors` must be `FALSE` #' * [`difftime()`][base::difftime()]: only supports `units = "secs"` (the default); #' `tz` argument not supported #' * [`endsWith()`][base::endsWith()] #' * [`exp()`][base::exp()] +#' * [`expm1()`][base::expm1()] #' * [`floor()`][base::floor()] #' * [`format()`][base::format()] #' * [`grepl()`][base::grepl()] @@ -171,6 +178,7 @@ #' * [`round()`][base::round()] #' * [`sign()`][base::sign()] #' * [`sin()`][base::sin()] +#' * [`sinh()`][base::sinh()] #' * [`sqrt()`][base::sqrt()] #' * [`startsWith()`][base::startsWith()] #' * [`strftime()`][base::strftime()] @@ -183,6 +191,7 @@ #' * [`substring()`][base::substring()] #' * [`sum()`][base::sum()] #' * [`tan()`][base::tan()] +#' * [`tanh()`][base::tanh()] #' * [`tolower()`][base::tolower()] #' * [`toupper()`][base::toupper()] #' * [`trunc()`][base::trunc()] diff --git a/r/R/dplyr-funcs-simple.R b/r/R/dplyr-funcs-simple.R index 4ccc2498435b3..05780721f2410 100644 --- a/r/R/dplyr-funcs-simple.R +++ b/r/R/dplyr-funcs-simple.R @@ -32,14 +32,22 @@ "base::log1p" = "log1p_checked", "base::log2" = "log2_checked", "base::sign" = "sign", + "base::expm1" = "expm1", # trunc is defined in dplyr-functions.R # trigonometric functions "base::acos" = "acos_checked", "base::asin" = "asin_checked", "base::cos" = "cos_checked", + "base::atan" = "atan", "base::sin" = "sin_checked", "base::tan" = "tan_checked", + "base::cosh" = "cosh", + "base::sinh" = "sinh", + "base::tanh" = "tanh", + "base::acosh" = "acosh_checked", + "base::asinh" = "asinh", + "base::atanh" = "atanh_checked", # logical functions "!" = "invert", diff --git a/r/extra-tests/helpers.R b/r/extra-tests/helpers.R deleted file mode 100644 index 3fb450ee33272..0000000000000 --- a/r/extra-tests/helpers.R +++ /dev/null @@ -1,36 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -if_version <- function(version, op = `==`) { - op(packageVersion("arrow"), version) -} - -if_version_less_than <- function(version) { - if_version(version, op = `<`) -} - -skip_if_version_less_than <- function(version, msg) { - if (if_version(version, `<`)) { - skip(msg) - } -} - -skip_if_version_equals <- function(version, msg) { - if (if_version(version, `==`)) { - skip(msg) - } -} diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R index 4201e00d7c314..2f5db38e52044 100644 --- a/r/extra-tests/test-read-files.R +++ b/r/extra-tests/test-read-files.R @@ -18,6 +18,8 @@ library(arrow) library(testthat) +source("tests/testthat/helper-skip.R") + pq_file <- "files/ex_data.parquet" test_that("Can read the file (parquet)", { @@ -30,7 +32,7 @@ test_that("Can read the file (parquet)", { ### Parquet test_that("Can see the metadata (parquet)", { - skip_if_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") + skip_if_arrow_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") df <- read_parquet(pq_file) expect_s3_class(df, "tbl") @@ -74,7 +76,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) { }) test_that(paste0("Can see the metadata (feather ", comp, ")"), { - skip_if_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") + skip_if_arrow_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") df <- read_feather(feather_file) expect_s3_class(df, "tbl") @@ -132,7 +134,7 @@ test_that("Can read the file (parquet)", { }) test_that("Can see the metadata (stream)", { - skip_if_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") + skip_if_arrow_version_less_than("2.0.0", "Version 1.0.1 can't read new version metadata.") df <- read_ipc_stream(stream_file) expect_s3_class(df, "tbl") diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R index bd29080848184..da48910133469 100644 --- a/r/tests/testthat/helper-skip.R +++ b/r/tests/testthat/helper-skip.R @@ -124,6 +124,26 @@ skip_on_python_older_than <- function(python_version) { } } +if_arrow_version <- function(version, op = `==`) { + op(packageVersion("arrow"), version) +} + +if_arrow_version_less_than <- function(version) { + if_version(version, op = `<`) +} + +skip_if_arrow_version_less_than <- function(version, msg) { + if (if_arrow_version(version, `<`)) { + skip(msg) + } +} + +skip_if_arrow_version_equals <- function(version, msg) { + if (if_arrow_version(version, `==`)) { + skip(msg) + } +} + process_is_running <- function(x) { if (force_tests()) { # Return TRUE as this is used as a condition in an if statement diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R index 22c4ee8002e7a..602e718b71c5e 100644 --- a/r/tests/testthat/test-compute-arith.R +++ b/r/tests/testthat/test-compute-arith.R @@ -223,22 +223,32 @@ test_that("Math group generics work on Array objects", { ) expect_error(signif(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") - expect_error(expm1(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") expect_error(cospi(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") expect_error(sinpi(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") expect_error(tanpi(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") - expect_error(cosh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") - expect_error(sinh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") - expect_error(tanh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") - - expect_error(acosh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") - expect_error(asinh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") - expect_error(atanh(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") - expect_error(lgamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") expect_error(gamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") expect_error(digamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") expect_error(trigamma(Array$create(c(4L, 1L))), "Unsupported operation on `Array`") }) + +test_that("hyperbolic trig functions work on Array objects", { + skip_if_arrow_version_less_than("18.1.0.9000", "Hyperbolic trig functions not available until version 19.") + + expect_equal(sinh(Array$create(c(0.6, 0.9))), Array$create(sinh(c(0.6, 0.9)))) + expect_equal(cosh(Array$create(c(0.6, 0.9))), Array$create(cosh(c(0.6, 0.9)))) + expect_equal(tanh(Array$create(c(0.6, 0.9))), Array$create(tanh(c(0.6, 0.9)))) + expect_equal(asinh(Array$create(c(0.6, 0.9))), Array$create(asinh(c(0.6, 0.9)))) + expect_error(acosh(Array$create(c(0.6, 0.9))), "Invalid: domain error") + expect_equal(acosh(Array$create(c(1, 2))), Array$create(acosh(c(1, 2)))) + expect_error(atanh(Array$create(c(-1, 1))), "Invalid: domain error") + expect_equal(atanh(Array$create(c(0.6, 0.9))), Array$create(atanh(c(0.6, 0.9)))) +}) + +test_that("expm1 works on Array objects", { + skip_if_arrow_version_less_than("18.1.0.9000", "expm1 not available until version 19.") + + expect_equal(expm1(Array$create(c(0.00000001, 10))), Array$create(expm1(c(0.00000001, 10)))) +}) diff --git a/r/tests/testthat/test-dplyr-funcs-math.R b/r/tests/testthat/test-dplyr-funcs-math.R index 1057f7ae06eb0..bed2da110ba4b 100644 --- a/r/tests/testthat/test-dplyr-funcs-math.R +++ b/r/tests/testthat/test-dplyr-funcs-math.R @@ -315,6 +315,13 @@ test_that("trig functions", { df ) + compare_dplyr_binding( + .input %>% + mutate(y = atan(x)) %>% + collect(), + df + ) + # with namespacing compare_dplyr_binding( .input %>% @@ -323,7 +330,94 @@ test_that("trig functions", { b = base::cos(x), c = base::tan(x), d = base::asin(x), - e = base::acos(x) + e = base::acos(x), + f = base::atan(x) + ) %>% + collect(), + df + ) +}) + +test_that("hyperbolic trig functions", { + skip_if_arrow_version_less_than("18.1.0.9000", "Hyperbolic trig functions not available until version 19.") + + # Note: We change df mid-test because domains differ by function + df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA)) + + compare_dplyr_binding( + .input %>% + mutate(y = sinh(x)) %>% + collect(), + df + ) + + compare_dplyr_binding( + .input %>% + mutate(y = cosh(x)) %>% + collect(), + df + ) + + compare_dplyr_binding( + .input %>% + mutate(y = tanh(x)) %>% + collect(), + df + ) + + compare_dplyr_binding( + .input %>% + mutate(y = asinh(x)) %>% + collect(), + df + ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate( + a = base::sinh(x), + b = base::cosh(x), + c = base::tanh(x), + d = base::asinh(x), + ) %>% + collect(), + df + ) + + df <- tibble(x = c(seq(from = 1, to = 2, by = 0.1))) + + compare_dplyr_binding( + .input %>% + mutate(y = acosh(x)) %>% + collect(), + df + ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate( + a = base::acosh(x), + ) %>% + collect(), + df + ) + + df <- tibble(x = c(seq(from = -0.5, to = 0.5, by = 0.1))) + + compare_dplyr_binding( + .input %>% + mutate(y = atanh(x)) %>% + collect(), + df + ) + + # with namespacing + compare_dplyr_binding( + .input %>% + mutate( + a = base::atanh(x) ) %>% collect(), df @@ -399,3 +493,19 @@ test_that("sqrt()", { df ) }) + +test_that("expm1()", { + skip_if_arrow_version_less_than("18.1.0.9000", "expm1 not available until version 19.") + + df <- tibble(x = c(1:5)) + + compare_dplyr_binding( + .input %>% + mutate( + y = expm1(x), + y2 = base::expm1(x) + ) %>% + collect(), + df + ) +})