Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for Decimal32/64 to R package #717

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
5 changes: 4 additions & 1 deletion r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ S3method(as_nanoarrow_schema,nanoarrow_schema)
S3method(as_nanoarrow_schema,nanoarrow_vctr)
S3method(c,nanoarrow_vctr)
S3method(convert_array,default)
S3method(convert_array,double)
S3method(convert_array,factor)
S3method(convert_array,nanoarrow_vctr)
S3method(convert_array,vctrs_partial_frame)
Expand Down Expand Up @@ -155,6 +154,8 @@ export(na_date32)
export(na_date64)
export(na_decimal128)
export(na_decimal256)
export(na_decimal32)
export(na_decimal64)
export(na_dense_union)
export(na_dictionary)
export(na_double)
Expand All @@ -173,8 +174,10 @@ export(na_interval_month_day_nano)
export(na_interval_months)
export(na_large_binary)
export(na_large_list)
export(na_large_list_view)
export(na_large_string)
export(na_list)
export(na_list_view)
export(na_map)
export(na_na)
export(na_sparse_union)
Expand Down
26 changes: 26 additions & 0 deletions r/R/as-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -475,3 +475,29 @@ as_nanoarrow_array_from_c <- function(x, schema) {

result
}

# Helper to allow us to use nanoarrow's string parser, which parses integers
# to set decimal storage but not the slightly more useful case of parsing
# things with decimal points yet.
storage_integer_for_decimal <- function(numbers, scale) {
rounded_formatted <- storage_decimal_for_decimal(numbers, scale)
gsub(".", "", rounded_formatted, fixed = TRUE)
}

storage_decimal_for_decimal <- function(numbers, scale) {
if (scale > 0) {
rounded_formatted <- sprintf("%0.*f", scale, numbers)
rounded_formatted[is.na(numbers)] <- NA_character_
} else {
rounded <- round(numbers, scale)
is_zero <- !is.na(rounded) & rounded == 0
rounded_formatted <- as.character(rounded)
rounded_formatted[!is_zero] <- gsub(
paste0("0{", -scale, "}$"),
"",
rounded_formatted[!is_zero]
)
}

rounded_formatted
}
18 changes: 18 additions & 0 deletions r/R/buffer.R
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,24 @@ as_nanoarrow_array.nanoarrow_buffer <- function(x, ..., schema = NULL) {
buffers = list(NULL, offsets, x)
)
)
} else if (data_type %in% c("decimal32", "decimal64", "decimal128", "decimal256")) {
# Create an array with max precision and scale 0, which results in the
# decimal integer value getting displayed.
array <- nanoarrow_array_init(
na_type(
data_type,
precision = max_decimal_precision(data_type),
scale = 0
)
)
nanoarrow_array_modify(
array,
list(
length = logical_length,
null_count = 0,
buffers = list(NULL, x)
)
)
} else if (data_type %in% c("string_view", "binary_view")) {
stop("Can't convert buffer of type string_view or binary_view to array")
} else {
Expand Down
20 changes: 0 additions & 20 deletions r/R/convert-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,26 +155,6 @@ convert_array.nanoarrow_vctr <- function(array, to, ...) {
new_nanoarrow_vctr(list(array), schema, class(to))
}

#' @export
convert_array.double <- function(array, to, ...) {
# Handle conversion from decimal128 via arrow
schema <- infer_nanoarrow_schema(array)
parsed <- nanoarrow_schema_parse(schema)
if (parsed$type == "decimal128") {
assert_arrow_installed(
sprintf(
"convert %s array to object of type double",
nanoarrow_schema_formatted(schema)
)
)

arrow_array <- as_arrow_array.nanoarrow_array(array)
arrow_array$as_vector()
} else {
NextMethod()
}
}

#' @export
convert_array.vctrs_partial_frame <- function(array, to, ...) {
ptype <- infer_nanoarrow_ptype(array)
Expand Down
60 changes: 59 additions & 1 deletion r/R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
#' na_struct(list(col1 = na_int32()))
#'
na_type <- function(type_name, byte_width = NULL, unit = NULL, timezone = NULL,
precision = NULL, scale = NULL,
column_types = NULL, item_type = NULL, key_type = NULL,
value_type = NULL, index_type = NULL, ordered = NULL,
list_size = NULL, keys_sorted = NULL, storage_type = NULL,
Expand All @@ -76,6 +77,8 @@ na_type <- function(type_name, byte_width = NULL, unit = NULL, timezone = NULL,
byte_width = byte_width,
unit = unit,
timezone = timezone,
precision = precision,
scale = scale,
column_types = column_types,
item_type = item_type,
key_type = key_type,
Expand Down Expand Up @@ -307,6 +310,30 @@ na_timestamp <- function(unit = c("us", "ns", "s", "ms"), timezone = "", nullabl
)
}

#' @rdname na_type
#' @export
na_decimal32 <- function(precision, scale, nullable = TRUE) {
.Call(
nanoarrow_c_schema_init_decimal,
NANOARROW_TYPE$DECIMAL32,
as.integer(precision)[1],
as.integer(scale)[1],
isTRUE(nullable)
)
}

#' @rdname na_type
#' @export
na_decimal64 <- function(precision, scale, nullable = TRUE) {
.Call(
nanoarrow_c_schema_init_decimal,
NANOARROW_TYPE$DECIMAL64,
as.integer(precision)[1],
as.integer(scale)[1],
isTRUE(nullable)
)
}

#' @rdname na_type
#' @export
na_decimal128 <- function(precision, scale, nullable = TRUE) {
Expand Down Expand Up @@ -371,6 +398,22 @@ na_large_list <- function(item_type, nullable = TRUE) {
schema
}

#' @rdname na_type
#' @export
na_list_view <- function(item_type, nullable = TRUE) {
schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LIST_VIEW, isTRUE(nullable))
schema$children[[1]] <- item_type
schema
}

#' @rdname na_type
#' @export
na_large_list_view <- function(item_type, nullable = TRUE) {
schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LARGE_LIST_VIEW, isTRUE(nullable))
schema$children[[1]] <- item_type
schema
}

#' @rdname na_type
#' @export
na_fixed_size_list <- function(item_type, list_size, nullable = TRUE) {
Expand Down Expand Up @@ -430,6 +473,17 @@ time_unit_id <- function(time_unit) {
match(time_unit, c("s", "ms", "us", "ns")) - 1L
}

max_decimal_precision <- function(type) {
switch(
type,
decimal32 = 9,
decimal64 = 18,
decimal128 = 38,
decimal256 = 76,
stop(sprintf("non-decimal type name: %s", type))
)
}

# These values aren't guaranteed to stay stable between nanoarrow versions,
# so we keep them internal but use them in these functions to simplify the
# number of C functions we need to build all the types.
Expand Down Expand Up @@ -475,7 +529,11 @@ NANOARROW_TYPE <- list(
INTERVAL_MONTH_DAY_NANO = 38L,
RUN_END_ENCODED = 39L,
BINARY_VIEW = 40L,
STRING_VIEW = 41L
STRING_VIEW = 41L,
DECIMAL32 = 42L,
DECIMAL64 = 43L,
LIST_VIEW = 44L,
LARGE_LIST_VIEW = 45L
)

ARROW_FLAG <- list(
Expand Down
22 changes: 18 additions & 4 deletions r/man/na_type.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions r/nanoarrow.Rproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Version: 1.0
ProjectId: 66584247-a2ea-4bae-bcbd-86d4e09bc627

RestoreWorkspace: No
SaveWorkspace: No
Expand Down
77 changes: 77 additions & 0 deletions r/src/as_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,29 @@ static void call_as_nanoarrow_array(SEXP x_sexp, struct ArrowArray* array,
UNPROTECT(3);
}

static SEXP call_storage_integer_for_decimal(SEXP x_sexp, int scale) {
SEXP scale_sexp = PROTECT(Rf_ScalarInteger(scale));
SEXP fun = PROTECT(Rf_install("storage_integer_for_decimal"));
SEXP call = PROTECT(Rf_lang3(fun, x_sexp, scale_sexp));
SEXP result = PROTECT(Rf_eval(call, nanoarrow_ns_pkg));
UNPROTECT(4);
return result;
}

static void as_decimal_array(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr,
struct ArrowSchemaView* schema_view,
struct ArrowError* error);

static void as_array_int(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr,
struct ArrowSchemaView* schema_view, struct ArrowError* error) {
// Consider integer -> numeric types that are easy to implement
switch (schema_view->type) {
case NANOARROW_TYPE_DECIMAL32:
case NANOARROW_TYPE_DECIMAL64:
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256:
as_decimal_array(x_sexp, array, schema_xptr, schema_view, error);
return;
case NANOARROW_TYPE_DOUBLE:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_HALF_FLOAT:
Expand Down Expand Up @@ -215,6 +234,12 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr
// Consider double -> na_double() and double -> na_int64()/na_int32()
// (mostly so that we can support date/time types with various units)
switch (schema_view->type) {
case NANOARROW_TYPE_DECIMAL32:
case NANOARROW_TYPE_DECIMAL64:
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256:
as_decimal_array(x_sexp, array, schema_xptr, schema_view, error);
return;
case NANOARROW_TYPE_DOUBLE:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_HALF_FLOAT:
Expand Down Expand Up @@ -346,6 +371,58 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr
}
}

static void as_decimal_array(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr,
struct ArrowSchemaView* schema_view,
struct ArrowError* error) {
// Use R to generate the input we need for ArrowDecimalSetDigits()
SEXP x_digits_sexp =
PROTECT(call_storage_integer_for_decimal(x_sexp, schema_view->decimal_scale));

struct ArrowDecimal item;
ArrowDecimalInit(&item, schema_view->decimal_bitwidth, schema_view->decimal_precision,
schema_view->decimal_scale);

int result = ArrowArrayInitFromType(array, schema_view->type);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayInitFromType() failed");
}

result = ArrowArrayStartAppending(array);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayStartAppending() failed");
}

int64_t len = Rf_xlength(x_sexp);
result = ArrowArrayReserve(array, len);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayReserve() failed");
}

struct ArrowStringView item_digits_view;
for (int64_t i = 0; i < len; i++) {
SEXP item_sexp = STRING_ELT(x_digits_sexp, i);
if (item_sexp == NA_STRING) {
result = ArrowArrayAppendNull(array, 1);
} else {
item_digits_view.data = CHAR(item_sexp);
item_digits_view.size_bytes = Rf_length(item_sexp);
ArrowDecimalSetDigits(&item, item_digits_view);
result = ArrowArrayAppendDecimal(array, &item);
}

if (result != NANOARROW_OK) {
Rf_error("ArrowArrayAppendDecimal() failed");
}
}

UNPROTECT(1);

result = ArrowArrayFinishBuildingDefault(array, error);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayFinishBuildingDefault(): %s", error->message);
}
}

static void as_array_chr(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr,
struct ArrowSchemaView* schema_view, struct ArrowError* error) {
switch (schema_view->type) {
Expand Down
1 change: 0 additions & 1 deletion r/src/convert_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

#include "altrep.h"
#include "array.h"
#include "array_view.h"
#include "convert.h"
#include "util.h"

Expand Down
Loading
Loading