From 69839c33ce2db3ec06e8317ef5223973675670d9 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 20 Feb 2024 15:54:27 -0600 Subject: [PATCH 1/2] Export table_path infrastructure Fixes #1300 Includes some changes to make the interface a little nicer and to fix some issues I noticed while I was looking at this code with fresh eyes: * `table_name()` -> `table_path_name()` * `is_table_id()` requires characters and table_paths to be length 1 * Vectorise `table_path_components()` instead of `table_path_name()` * Make `table_path_components()` an S3 generic (as an escape hatch if needed) --- NAMESPACE | 6 +++ NEWS.md | 4 ++ R/backend-postgres-old.R | 2 +- R/db-sql.R | 4 +- R/lazy-join-query.R | 2 +- R/query-join.R | 2 +- R/remote.R | 2 +- R/table-name.R | 81 ++++++++++++++++++++++++-------- R/utils.R | 2 +- man/is_table_path.Rd | 45 ++++++++++++++++++ tests/testthat/test-table-name.R | 32 +++++++++++++ 11 files changed, 155 insertions(+), 27 deletions(-) create mode 100644 man/is_table_path.Rd diff --git a/NAMESPACE b/NAMESPACE index 79b18ef8a..5c43ae400 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -416,6 +416,7 @@ S3method(supports_window_clause,PqConnection) S3method(supports_window_clause,Redshift) S3method(supports_window_clause,RedshiftConnection) S3method(supports_window_clause,SQLiteConnection) +S3method(table_path_components,DBIConnection) S3method(tail,tbl_lazy) S3method(tally,tbl_lazy) S3method(tbl,src_dbi) @@ -438,6 +439,7 @@ S3method(values_prepare,DBIConnection) S3method(values_prepare,SQLiteConnection) export("%>%") export(as.sql) +export(as_table_path) export(base_agg) export(base_no_win) export(base_odbc_agg) @@ -446,6 +448,7 @@ export(base_odbc_win) export(base_scalar) export(base_win) export(build_sql) +export(check_table_path) export(copy_inline) export(copy_lahman) export(copy_nycflights13) @@ -472,6 +475,7 @@ export(in_catalog) export(in_schema) export(is.ident) export(is.sql) +export(is_table_path) export(join_query) export(lahman_mysql) export(lahman_postgres) @@ -579,6 +583,8 @@ export(src_memdb) export(src_sql) export(src_test) export(supports_window_clause) +export(table_path_components) +export(table_path_name) export(tbl_lazy) export(tbl_memdb) export(tbl_sql) diff --git a/NEWS.md b/NEWS.md index e73b501e4..68bd5912e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # dbplyr (development version) +* dbplyr now exports some tools to work with the internal `table_path` class + which is useful for certain backends that need to work with this + data structure (#1300). + * Oracle once again translates `head()` to `FETCH FIRST`. This does require Oracle 12c or newer, but it actually works, compared to the approach using `ROWNUM` from #1292 (#1436). diff --git a/R/backend-postgres-old.R b/R/backend-postgres-old.R index e79379a3e..102246f8a 100644 --- a/R/backend-postgres-old.R +++ b/R/backend-postgres-old.R @@ -26,7 +26,7 @@ db_write_table.PostgreSQLConnection <- function(con, # the bare table name dbWriteTable( con, - name = table_name(table, con), + name = table_path_name(table, con), value = values, field.types = types, ..., diff --git a/R/db-sql.R b/R/db-sql.R index 63235d7b0..0198b4f45 100644 --- a/R/db-sql.R +++ b/R/db-sql.R @@ -176,7 +176,7 @@ sql_table_index.DBIConnection <- function(con, table <- as_table_path(table, con) if (is.null(name)) { - table_name <- table_name(table, con) + table_name <- table_path_name(table, con) name <- name %||% paste0(c(table_name, columns), collapse = "_") } glue_sql2( @@ -259,7 +259,7 @@ sql_query_wrap.DBIConnection <- function(con, from, name = NULL, ..., lvl = 0) { glue_sql2(con, "{from}", as_sql, "{.tbl name}") } else { # must be a table_path if (!is.null(name)) { - table <- table_name(name, con) + table <- table_path_name(name, con) names(from) <- as_table_path(table, con) } from diff --git a/R/lazy-join-query.R b/R/lazy-join-query.R index 03209a7bd..50a2d63d8 100644 --- a/R/lazy-join-query.R +++ b/R/lazy-join-query.R @@ -188,7 +188,7 @@ sql_build.lazy_multi_join_query <- function(op, con, ..., sql_options = NULL) { } generate_join_table_names <- function(table_names, con) { - names <- table_name(table_names$name, con) + names <- table_path_name(table_names$name, con) table_name_length_max <- max(nchar(names)) if (length(table_names$name) != 2) { diff --git a/R/query-join.R b/R/query-join.R index 733781148..2f56e3088 100644 --- a/R/query-join.R +++ b/R/query-join.R @@ -347,7 +347,7 @@ sql_qualify_var <- function(con, table, var) { var <- sql_escape_ident(con, var) if (!is.null(table)) { - table <- table_name(table, con) + table <- table_path_name(table, con) table <- as_table_paths(table, con) sql(paste0(table, ".", var)) diff --git a/R/remote.R b/R/remote.R index 41a977c8f..b267e2966 100644 --- a/R/remote.R +++ b/R/remote.R @@ -40,7 +40,7 @@ remote_name <- function(x, null_if_local = TRUE) { if (is.null(con)) { table } else { - table_name(table, con) + table_path_name(table, con) } } } diff --git a/R/table-name.R b/R/table-name.R index d968257ea..d8f7c62bc 100644 --- a/R/table-name.R +++ b/R/table-name.R @@ -23,6 +23,34 @@ table_path <- function(x) { # So you can do SQL(table_path("foo")) setOldClass(c("dbplyr_table_path", "character")) + +#' Table paths +#' +#' @description +#' dbplyr standardises all the ways of referring to a table (i.e. a single +#' string, a string wrapped in `I()`, a [DBI::Id()] and the results of +#' [in_schema()] and [in_catalog()]) into a table "path" of the form +#' `table`, `schema.table`, or `catalog.schema.path`. A table path is +#' always suitable for inlining into a query, so user input is quoted unless +#' it is wrapped in `I()`. +#' +#' This is primarily for internal usage, but you may need to work with it if +#' you're implementing a backend, and you need to compute with the table path, +#' not just pass it on unchanged to some other dbplyr function. +#' +#' * `is_table_path()` returns `TRUE` if the object is a `table_path`. +#' * `as_table_path()` coerces known table identifiers to a `table_path`. +#' * `check_table_path()` throws an error if the object is not a `table_path`. +#' * `table_path_name()` returns the last component of the table path (i.e. +#' the name of the table). +#' * `table_path_components()` returns a list containing the components of each +#' table path. +#' +#' A `table_path` object can technically be a vector of table paths, but +#' you will never see this in table paths constructed from user inputs. +#' +#' @keywords internal +#' @export is_table_path <- function(x) { inherits(x, "dbplyr_table_path") } @@ -36,6 +64,7 @@ print.dbplyr_table_path <- function(x, ...) { `[.dbplyr_table_path` <- function(x, ...) { table_path(NextMethod()) } + #' @export `[[.dbplyr_table_path` <- function(x, ...) { table_path(NextMethod()) @@ -66,35 +95,44 @@ as_table_paths <- function(x, con) { make_table_path(x, con, collapse = FALSE) } -# Extract just the table name from a full identifier -table_name <- function(x, con) { - x <- as_table_path(x, con) - - vapply(x, FUN.VALUE = character(1), function(x) { - if (x == "") return("") +#' @export +#' @rdname is_table_path +table_path_name <- function(x, con) { + path <- as_table_path(x, con) + components <- table_path_components(path, con) - out <- table_path_components(x, con) - out[[length(out)]] + vapply(components, FUN.VALUE = character(1), function(x) { + if (length(x) == 0) "" else x[[length(x)]] }) } + +#' @export +#' @rdname is_table_path table_path_components <- function(x, con) { + UseMethod("table_path_components", con) +} + +#' @export +table_path_components.DBIConnection <- function(x, con) { quote_char <- substr(sql_escape_ident(con, ""), 1, 1) - scan( - text = x, - what = character(), - quote = quote_char, - quiet = TRUE, - na.strings = character(), - sep = "." - ) + lapply(x, function(x) { + scan( + text = x, + what = character(), + quote = quote_char, + quiet = TRUE, + na.strings = character(), + sep = "." + ) + }) } #' @export escape.dbplyr_table_path <- function(x, parens = FALSE, collapse = ", ", con = NULL) { # names are always already escaped alias <- names2(x) - table_path <- as_table_path(table_name(x, con), con) + table_path <- as_table_path(table_path_name(x, con), con) has_alias <- alias == "" | alias == table_path if (db_supports_table_alias_with_as(con)) { @@ -116,14 +154,15 @@ check_table_id <- function(x, arg = caller_arg(x), call = caller_env()) { } is_table_id <- function(x) { - is_table_path(x) || - is.ident(x) || + is.ident(x) || methods::is(x, "Id") || is_catalog(x) || is_schema(x) || - is.character(x) + ((is.character(x) || is_table_path(x)) && length(x) == 1) } +#' @export +#' @rdname is_table_path check_table_path <- function(x, error_arg = caller_arg(x), error_call = caller_env()) { @@ -136,6 +175,8 @@ check_table_path <- function(x, } } +#' @export +#' @rdname is_table_path as_table_path <- function(x, con, error_arg = caller_arg(x), diff --git a/R/utils.R b/R/utils.R index 21f50bea6..04fd20fb1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -89,7 +89,7 @@ add_temporary_prefix <- function(con, table, temporary = TRUE) { return(table) } - pieces <- table_path_components(table, con) + pieces <- table_path_components(table, con)[[1]] table_name <- pieces[length(pieces)] if (substr(table_name, 1, 1) != "#") { diff --git a/man/is_table_path.Rd b/man/is_table_path.Rd new file mode 100644 index 000000000..f79efc67b --- /dev/null +++ b/man/is_table_path.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/table-name.R +\name{is_table_path} +\alias{is_table_path} +\alias{table_path_name} +\alias{table_path_components} +\alias{check_table_path} +\alias{as_table_path} +\title{Table paths} +\usage{ +is_table_path(x) + +table_path_name(x, con) + +table_path_components(x, con) + +check_table_path(x, error_arg = caller_arg(x), error_call = caller_env()) + +as_table_path(x, con, error_arg = caller_arg(x), error_call = caller_env()) +} +\description{ +dbplyr standardises all the ways of referring to a table (i.e. a single +string, a string wrapped in \code{I()}, a \code{\link[DBI:Id]{DBI::Id()}} and the results of +\code{\link[=in_schema]{in_schema()}} and \code{\link[=in_catalog]{in_catalog()}}) into a table "path" of the form +\code{table}, \code{schema.table}, or \code{catalog.schema.path}. A table path is +always suitable for inlining into a query, so user input is quoted unless +it is wrapped in \code{I()}. + +This is primarily for internal usage, but you may need to work with it if +you're implementing a backend, and you need to compute with the table path, +not just pass it on unchanged to some other dbplyr function. +\itemize{ +\item \code{is_table_path()} returns \code{TRUE} if the object is a \code{table_path}. +\item \code{as_table_path()} coerces known table identifiers to a \code{table_path}. +\item \code{check_table_path()} throws an error if the object is not a \code{table_path}. +\item \code{table_path_name()} returns the last component of the table path (i.e. +the name of the table). +\item \code{table_path_components()} returns a list containing the components of each +table path. +} + +A \code{table_path} object can technically be a vector of table paths, but +you will never see this in table paths constructed from user inputs. +} +\keyword{internal} diff --git a/tests/testthat/test-table-name.R b/tests/testthat/test-table-name.R index ae1a73868..72c724cdb 100644 --- a/tests/testthat/test-table-name.R +++ b/tests/testthat/test-table-name.R @@ -63,6 +63,10 @@ test_that("can coerce all user facing inputs", { expect_equal(as_table_path(id, con), table_path("`foo`.bar.baz")) }) +test_that("vectorised table_path is not a table_id", { + expect_false(is_table_id(table_path(c("x", "y")))) +}) + test_that("strips names", { con <- simulate_dbi() expect_equal(as_table_path(c(x = "x"), con), table_path("`x`")) @@ -88,3 +92,31 @@ test_that("as_table_path warns when using sql", { con <- simulate_dbi() expect_snapshot(as_table_path(sql("x"), con)) }) + +# components -------------------------------------------------------------- + +test_that("can extract components from path", { + con <- simulate_dbi() + + expect_equal( + table_path_components(table_path("x.y"), con), + list(c("x", "y")) + ) + expect_equal( + table_path_components(table_path("`x.y`.z"), con), + list(c("x.y", "z")) + ) + expect_equal( + table_path_components(table_path("`x.y`.`y.z`"), con), + list(c("x.y", "y.z")) + ) +}) + +test_that("can extract names from path", { + con <- simulate_dbi() + + expect_equal( + table_path_name(table_path(c("x.y", "`x.y`.z", "x.`y.z`")), con), + c("y", "z", "y.z") + ) +}) From adec444700b409e0a90850bf6c40da6901eeeecf Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Wed, 21 Feb 2024 08:03:38 -0600 Subject: [PATCH 2/2] Vectorise test --- tests/testthat/test-table-name.R | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/tests/testthat/test-table-name.R b/tests/testthat/test-table-name.R index 72c724cdb..542b07bca 100644 --- a/tests/testthat/test-table-name.R +++ b/tests/testthat/test-table-name.R @@ -95,20 +95,12 @@ test_that("as_table_path warns when using sql", { # components -------------------------------------------------------------- -test_that("can extract components from path", { +test_that("can parse components from path", { con <- simulate_dbi() expect_equal( - table_path_components(table_path("x.y"), con), - list(c("x", "y")) - ) - expect_equal( - table_path_components(table_path("`x.y`.z"), con), - list(c("x.y", "z")) - ) - expect_equal( - table_path_components(table_path("`x.y`.`y.z`"), con), - list(c("x.y", "y.z")) + table_path_components(table_path(c("x.y", "`x.y`.z", "`x.y`.`y.z`")), con), + list(c("x", "y"), c("x.y", "z"), c("x.y", "y.z")) ) })