Skip to content

Commit

Permalink
Merge pull request #236 from jmbarbone/234-write-arrow
Browse files Browse the repository at this point in the history
234 write arrow
  • Loading branch information
jmbarbone authored Jun 17, 2024
2 parents 7b9b4cc + 2fae5ba commit 3067f18
Show file tree
Hide file tree
Showing 16 changed files with 188 additions and 66 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mark
Type: Package
Title: Miscellaneous, Analytic R Kernels
Version: 0.8.0.9001
Version: 0.8.0.9002
Authors@R:
person(given = "Jordan Mark",
family = "Barbone",
Expand Down Expand Up @@ -48,7 +48,8 @@ Suggests:
withr (>= 2.3.0),
xopen,
yaml,
jsonlite
jsonlite,
arrow (>= 16.1.0)
RoxygenNote: 7.3.1
Roxygen: list(markdown = TRUE)
Config/testthat/edition: 3
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# mark (development version)

* `write_file_md5()` now supports `"feather"` and `"parquet"` methods as wrappers for [`{arrow}`]() [#234](https://github.com/jmbarbone/mark/issues/234)
* `md5()` added to provide MD5 check sums for objects [#233](https://github.com/jmbarbone/jmbarbone/mark/issues/233)

# mark 0.8.0
Expand Down
118 changes: 94 additions & 24 deletions R/write.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' function is applied to each element of the list. The default `"auto"`
#' uses `toJSON()` if the package `jsonlite` is available, otherwise
#'
#'
#' @param x An object to write to file
#' @param path The file or connection to write to (dependent on part by method)
#' @param method The method of saving the file. When `default`, the method is
Expand Down Expand Up @@ -135,20 +136,22 @@ write_file_md5 <- function(
#' @export
#' @rdname write_file_md5
mark_write_methods <- function() {
list(
list0(
"default",
"csv",
"csv2",
"csv3",
"dcf",
"feather",
"json",
lines = c("lines", "md", "txt", "qmd", "rmd"),
"parquet",
"rds",
table = c("table", "delim"),
"tsv",
"tsv2",
"write",
yaml = c("yaml", "yml")
yaml = c("yaml", "yml"),
)
}

Expand Down Expand Up @@ -250,28 +253,11 @@ mark_write_table <- function(
col.names <- TRUE
}

if (identical(list_hook, "auto") || isTRUE(list_hook)) {
if (package_available("jsonlite")) {
list_hook <- "mark_to_json"
} else {
list_hook <- function(x) collapse(shQuote(x, "sh"), sep = ",")
}
} else if (isFALSE(list_hook)) {
list_hook <- function(x) NA_character_
} else if (isNA(list_hook)) {
list_hook <- function(x) {
stop(new_condition(
"options(mark.list.hook) is NA but list columns detected",
class = "writeFileMd5ListHook"
))
}
}

if (!isFALSE(list_hook) && !is.null(list_hook)) {
list_hook <- match.fun(list_hook)
ok <- vap_lgl(x, is.list)
if (any(ok)) {
x[ok] <- lapply(x[ok], function(i) vap_chr(i, list_hook))
list_hook <- get_list_hook(list_hook)
if (!is.null(list_hook)) {
cols <- which(vap_lgl(x, is.list))
for (col in cols) {
x[[col]] <- vap_chr(x[[col]], list_hook)
}
}

Expand All @@ -290,6 +276,39 @@ mark_write_table <- function(
)
}

get_list_hook <- function(hook) {
if (is.function(hook)) {
return(hook)
}

if (isTRUE(hook)) {
hook <- "auto"
} else if (isFALSE(hook)) {
hook <- "false"
} else if (is.na(hook)) {
hook <- "na"
}

switch(
hook,
auto = if (package_available("jsonlite")) {
get_list_hook("json")
} else {
get_list_hook("default")
},
json = mark_to_json,
default = function(x) collapse(shQuote(x, "sh"), sep = ","),
false = function(x) NA_character_,
none = NULL,
# nolint next: brace_linter.
na = function(x) stop(new_condition(
"options(mark.list.hook) is NA but list columns detected",
class = "writeFileMd5ListHook"
)),
match.fun(hook)
)
}

mark_write_dcf <- function(
x,
con = "",
Expand Down Expand Up @@ -353,6 +372,57 @@ mark_write_json <- function(x, con) {
mark_write_lines(string, con)
}

mark_write_feather <- function(x, con, ...) {
mark_write_arrow(x, con, ..., .method = "feather")
}

mark_write_parquet <- function(x, con, ...) {
mark_write_arrow(x, con, ..., .method = "parquet")
}

mark_write_arrow <- function(
x,
con,
...,
.method = c("feather", "parquet")
) {
require_namespace("arrow")
.method <- mark::match_param(.method)

switch(
.method,
feather = {
read <- arrow::read_feather
write <- arrow::write_feather
clean <- function() NULL
},
parquet = {
read <- arrow::read_parquet
write <- arrow::write_parquet
clean <- base::gc
}
)

if (identical(con, stdout())) {
temp <- tempfile()
con <- file(temp, open = "wb", encoding = "UTF-8")
on.exit({
co <- utils::capture.output(read(temp, as_data_frame = FALSE))
# Something weird was happening after reading the parquet object on
# windows; fs::file_delete() was throwing an EPERM error but file.remove()
# wasn't. Adding an explicit gc() seems to do the trick...
clean()
co <- grep("See $metadata", co, value = TRUE, invert = TRUE, fixed = TRUE)
co <- co[nzchar(co)]
writeLines(co)
safe_close(con)
safe_fs_delete(temp)
})
}

write(x, sink = con, ...)
}

# helpers -----------------------------------------------------------------

mark_to_json <- function(x) {
Expand Down
2 changes: 1 addition & 1 deletion man/reexports.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions tests/testthat/_snaps/write.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# arrow prints something to stdout()

Code
write_file_md5(quick_dfl(a = 1), method = "feather")
Output
Table
1 rows x 1 columns
$a <double>

---

Code
write_file_md5(quick_dfl(a = 1), method = "parquet")
Output
Table
1 rows x 1 columns
$a <double>

2 changes: 1 addition & 1 deletion tests/testthat/test-blank.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ test_that("is_blank() works", {
exp <- c(TRUE, FALSE, FALSE, FALSE)
expect_identical(obj, exp)

df <- data.frame(
df <- quick_dfl(
x = x,
i = 1:4,
na = rep(NA, 4L),
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-depth.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
test_that("depth", {
expect_equal(depth(NA), 1L)
expect_equal(depth(data.frame(a = 1)), 1L)
expect_equal(depth(quick_dfl(a = 1)), 1L)
expect_equal(depth(list()), 0L)
expect_equal(depth(NULL), 0L)
expect_equal(depth(list(1)), 1L)
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-detail.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ test_that("details() works", {
y <- factor(letters[1:3])
z <- c("x", NA_character_, "z")
attr(z, "label") <- "information"
df <- data.frame(x = x, y = factor(letters[1:3]))
df <- quick_dfl(x = x, y = factor(letters[1:3]))
expect_error(detail(x), NA)
expect_error(detail(df), NA)

Expand Down Expand Up @@ -43,5 +43,5 @@ test_that("details() and tibbles", {
})

test_that("details.data.frame() passes with single column [48]", {
expect_error(data.frame(a = 1), NA)
expect_error(quick_dfl(a = 1), NA)
})
2 changes: 1 addition & 1 deletion tests/testthat/test-environments.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ test_that("environments() and friends works", {
local({
foo_obj <- structure(list(), class = "foo")
foo_fun <- function() NULL
foo_df <- data.frame(a = 1)
foo_df <- quick_dfl(a = 1)
}, envir = ne)

# these are failing...?
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-expand.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ test_that("reindex() work", {

expect_error(reindex(1), "data.frame", class = "simpleError")
expect_error(
reindex(data.frame(a = 1), index = integer()),
reindex(quick_dfl(a = 1), index = integer()),
"new_index",
class = "simpleError"
)
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ test_that("data.frame assignment", {
class = "assignLabelsDataframeDotsError"
)

df <- data.frame(a = 1, b = 2, c = 3)
df <- quick_dfl(a = 1, b = 2, c = 3)
# nolint start: line_length_linter.
expect_error(assign_labels(df, c = "c", d = "d", .missing = "error"), class = "assignLabelsDataframeMissingError")
expect_warning(assign_labels(df, c = "c", d = "d", .missing = "warn"), class = "assignLabelsDataframeMissingWarning")
Expand All @@ -64,21 +64,21 @@ test_that("data.frame assign with data.frame", {

x <- assign_labels(iris, Sepal.Length = "a", Species = "b")

labels <- data.frame(
labels <- quick_dfl(
name = c("Sepal.Length", "Species"),
label = c("a", "b")
)

y <- assign_labels(iris, labels)

exp <- data.frame(
exp <- quick_dfl(
column = colnames(iris),
label = c("a", NA, NA, NA, "b")
)

expect_equal(get_labels(y), get_labels(y))

bad_labels <- data.frame(
bad_labels <- quick_dfl(
v1 = c("a", "b", 1),
v2 = c("x", "y", 2)
)
Expand Down
9 changes: 4 additions & 5 deletions tests/testthat/test-nas.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ test_that("remove_na()", {
x <- c(1, 2, NA, 3, NaN)
expect_equal(remove_na(x), c(1, 2, 3))
expect_equal(remove_na(as.list(x)), list(1, 2, numeric(), 3, numeric()))
expect_error(remove_na(data.frame(x = 1)), class = "checkIsVectorModeError")
expect_error(remove_na(quick_dfl(x = 1)), class = "checkIsVectorModeError")

res <- remove_na(fact(x))
exp <- struct(
Expand All @@ -25,17 +25,16 @@ test_that("remove_null()", {
expect_equal(remove_null(x), list(a = 1, c = 1))

expect_error(remove_null(c(1, 2)), class = "simpleError")
expect_error(remove_null(data.frame(x = NULL)), class = "simpleError")
expect_error(remove_null(quick_dfl(x = NULL)), class = "simpleError")
})

test_that("*_na_cols() works", {
x <- data.frame(
x <- quick_dfl(
first = c(NA, 2, 3, 4),
second = c(1, NA, 3, 4),
all = not_available(length = 4),
last = c(1, 2, 3, NA),
all2 = not_available(length = 4),
stringsAsFactors = FALSE
all2 = not_available(length = 4)
)

expect_equal(
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-note.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ test_that("note() work", {
})

test_that("print.noted() passes to next methods [67] (data.frame)", {
x <- data.frame(a = 1:50)
x <- quick_dfl(a = 1:50)
original <- capture.output(print(x, max = 5))
note(x) <- "note"

Expand Down Expand Up @@ -68,11 +68,11 @@ test_that("print.noted() passes to next methods [67] (tibble)", {
test_that("print_note() works with data.frame", {
withr::local_options(list(mark.check_interactive = NA))

x <- data.frame(a = 1:2, b = 1:2)
x <- quick_dfl(a = 1:2, b = 1:2)
note(x) <- "This should work"
expect_identical(print_note(x), x)

x <- list(a = 1:3, b = 2, c = data.frame(a = 1))
x <- list(a = 1:3, b = 2, c = quick_dfl(a = 1))
note(x) <- "This is a list"
expect_identical(print_note(x), x)
})
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ test_that("with_par() works", {

par0 <- graphics::par("mfrow")
set.seed(42)
df <- data.frame(a = stats::rnorm(100), b = stats::rnorm(100))
df <- quick_dfl(a = stats::rnorm(100), b = stats::rnorm(100))
# not testing for plot
grDevices::dev.off()
wuffle(with_par(
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-time-report.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ test_that("simpleTimeReport() works", {
foo_warn()
foo_msg()
cat("this is a cat\n")
data.frame(a = 1, b = 2)
quick_dfl(a = 1, b = 2)
})
),
NA)
Expand Down
Loading

0 comments on commit 3067f18

Please sign in to comment.