From 578ee216331c08e1e331fa6d8426085a330f6b8d Mon Sep 17 00:00:00 2001 From: Carl Boettiger Date: Sun, 18 Aug 2024 00:08:21 +0000 Subject: [PATCH 1/3] use random tbl name --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ R/open_dataset.R | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c58de4c..2a21c7c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: duckdbfs Title: High Performance Remote File System, Database and 'Geospatial' Access Using 'duckdb' -Version: 0.0.5 +Version: 0.0.6 Authors@R: c(person("Carl", "Boettiger", , "cboettig@gmail.com", c("aut", "cre"), comment = c(ORCID = "0000-0002-1642-628X")), diff --git a/NEWS.md b/NEWS.md index 6a0b79a..004dcba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# duckdbfs 0.0.6 + +* bugfix open_dataset() uses random table name by default, avoid naming collisions. + # duckdbfs 0.0.5 * bugfix `write_dataset()` no longer adds `**` into paths when writing some partitions. diff --git a/R/open_dataset.R b/R/open_dataset.R index d7b7204..e599395 100644 --- a/R/open_dataset.R +++ b/R/open_dataset.R @@ -63,7 +63,7 @@ open_dataset <- function(sources, unify_schemas = FALSE, format = c("parquet", "csv", "tsv", "sf"), conn = cached_connection(), - tblname = tbl_name(sources), + tblname = tmp_tbl_name(), mode = "VIEW", filename = FALSE, recursive = TRUE, From 3b77b18127d9c60b178b289eb7ee84b4a4fa3366 Mon Sep 17 00:00:00 2001 From: Carl Boettiger Date: Sun, 18 Aug 2024 00:14:38 +0000 Subject: [PATCH 2/3] redoc --- man/open_dataset.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/open_dataset.Rd b/man/open_dataset.Rd index 832104d..50115e8 100644 --- a/man/open_dataset.Rd +++ b/man/open_dataset.Rd @@ -11,7 +11,7 @@ open_dataset( unify_schemas = FALSE, format = c("parquet", "csv", "tsv", "sf"), conn = cached_connection(), - tblname = tbl_name(sources), + tblname = tmp_tbl_name(), mode = "VIEW", filename = FALSE, recursive = TRUE, From e941c8a7201c7c3f8cec74c4464fcdc033b9ca45 Mon Sep 17 00:00:00 2001 From: Carl Boettiger Date: Sun, 18 Aug 2024 01:43:16 +0000 Subject: [PATCH 3/3] fix #25 --- R/write_dataset.R | 31 +++++++++-------------------- tests/testthat/test-write_dataset.R | 11 ++++++++-- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/R/write_dataset.R b/R/write_dataset.R index f4d55c1..3331e3b 100644 --- a/R/write_dataset.R +++ b/R/write_dataset.R @@ -35,44 +35,31 @@ write_dataset <- function(dataset, tblname <- as.character(remote_name(dataset, conn)) } - path <- parse_uri(path, conn = conn, recursive = FALSE) - ## local writes use different notation to allow overwrites: allow_overwrite <- character(0) if(overwrite){ - allow_overwrite <- paste("OVERWRITE_OR_IGNORE") + allow_overwrite <- "OVERWRITE_OR_IGNORE" } + path <- parse_uri(path, conn = conn, recursive = FALSE) if(grepl("^s3://", path)) { duckdb_s3_config(conn = conn, ...) - if(overwrite){ - # allow_overwrite <- paste("ALLOW_OVERWRITE", overwrite) - } } - - format <- toupper(format) partition_by <- character(0) if(length(partitioning) > 0) { partition_by <- paste0("PARTITION_BY (", paste(partitioning, collapse=", "), - "), ") - } - comma <- character(0) - if (length(c(partition_by, allow_overwrite) > 0)){ - comma <- ", " + ") ") } - options <- paste0( - paste("FORMAT", "'parquet'"), comma, - partition_by, - allow_overwrite - ) - - query <- paste("COPY", tblname, "TO", - paste0("'", path, "'"), - paste0("(", options, ")"), ";") + format <- toupper(format) + format_by <- glue::glue("FORMAT {format}") + options_vec <- c(format_by, partition_by, allow_overwrite) + options <- glue::glue_collapse(options_vec, sep = ", ") + copy <- glue::glue("COPY {tblname} TO '{path}' ") + query <- glue::glue(copy, "({options})", ";") status <- DBI::dbSendQuery(conn, query) invisible(path) } diff --git a/tests/testthat/test-write_dataset.R b/tests/testthat/test-write_dataset.R index f7cbdd9..32b9d91 100644 --- a/tests/testthat/test-write_dataset.R +++ b/tests/testthat/test-write_dataset.R @@ -25,8 +25,9 @@ test_that("write_dataset", { ## Write from a query string path2 <- file.path(tempdir(), "spatial2.parquet") - tbl |> - dplyr::mutate(new = "test") |> + dataset <- tbl |> + dplyr::mutate(new = "test") + dataset |> write_dataset(path2) }) @@ -48,6 +49,12 @@ test_that("write_dataset partitions", { parts <- list.files(path) expect_true(any(grepl("cyl=4", parts))) + path <- file.path(tempdir(), "mtcars2") + mtcars |> write_dataset(path, partitioning = "cyl", overwrite=FALSE) + expect_true(file.exists(path)) + df <- open_dataset(path) + expect_s3_class(df, "tbl") + })