-
Notifications
You must be signed in to change notification settings - Fork 79
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: dbListTables()
lists materialized views
#414
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,11 +14,11 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) { | |
} | ||
query <- paste0( | ||
"SELECT ", null_varchar, " AS schema, table_name AS table FROM ( \n", | ||
list_tables(conn = conn, order_by = "table_type, table_name"), | ||
list_tables(conn = conn, order_by = "table_type, table_name"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. styler won't like it. Maybe use braces or parens to enforce indent here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm fine with not having an extra indent here and prefer that to extra braces or parentheses. If I run |
||
") as table_query \n", | ||
"UNION ALL\n", | ||
"SELECT DISTINCT table_schema AS schema, ", null_varchar, " AS table FROM ( \n", | ||
list_tables(conn = conn, where_schema = "true"), | ||
list_tables(conn = conn, where_schema = "true"), | ||
") as schema_query;" | ||
) | ||
} else { | ||
|
@@ -38,11 +38,11 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) { | |
) | ||
query <- paste0( | ||
"SELECT table_schema AS schema, table_name AS table FROM ( \n", | ||
list_tables( | ||
conn = conn, | ||
where_schema = where_schema, | ||
order_by = "table_type, table_name" | ||
), | ||
list_tables( | ||
conn = conn, | ||
where_schema = where_schema, | ||
order_by = "table_type, table_name" | ||
), | ||
") as table_query" | ||
) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -122,12 +122,21 @@ db_append_table <- function(conn, name, value, copy, warn) { | |
|
||
list_tables <- function(conn, where_schema = NULL, where_table = NULL, order_by = NULL) { | ||
|
||
query <- paste0( | ||
# information_schema.table docs: https://www.postgresql.org/docs/current/infoschema-tables.html | ||
"SELECT table_schema, table_name \n", | ||
"FROM information_schema.tables \n", | ||
"WHERE TRUE \n" # dummy clause to be able to add additional ones with `AND` | ||
) | ||
if (conn@system_catalogs) { | ||
query <- paste0( | ||
"SELECT table_schema, table_name \n", | ||
"FROM ( ", list_tables_from_system_catalog(), ") AS schema_tables \n", | ||
"WHERE TRUE \n" | ||
) | ||
} else { | ||
query <- paste0( | ||
# information_schema.table docs: | ||
# https://www.postgresql.org/docs/current/infoschema-tables.html | ||
"SELECT table_schema, table_name \n", | ||
"FROM information_schema.tables \n", | ||
"WHERE TRUE \n" # dummy clause to be able to add additional ones with `AND` | ||
) | ||
} | ||
|
||
if (is.null(where_schema)) { | ||
# `true` in `current_schemas(true)` is necessary to get temporary tables | ||
|
@@ -147,6 +156,38 @@ list_tables <- function(conn, where_schema = NULL, where_table = NULL, order_by | |
query | ||
} | ||
|
||
list_tables_from_system_catalog <- function() { | ||
# This imitates (parts of) information_schema.tables, but includes materialized views | ||
paste0( | ||
# pg_class vs. information_schema: https://stackoverflow.com/a/24089729 | ||
# pg_class docs: https://www.postgresql.org/docs/current/catalog-pg-class.html | ||
"SELECT n.nspname AS table_schema, cl.relname AS table_name, \n", | ||
" CASE | ||
WHEN (n.oid = pg_my_temp_schema()) THEN 'LOCAL TEMPORARY' | ||
WHEN (cl.relkind IN ('r', 'p')) THEN 'BASE TABLE' | ||
WHEN (cl.relkind = 'v') THEN 'VIEW' | ||
WHEN (cl.relkind = 'f') THEN 'FOREIGN' | ||
WHEN (cl.relkind = 'm') THEN 'MATVIEW' | ||
ELSE NULL | ||
END AS table_type \n", | ||
"FROM pg_class AS cl \n", | ||
"JOIN pg_namespace AS n ON cl.relnamespace = n.oid \n", | ||
# include: r = ordinary table, v = view, m = materialized view, | ||
# f = foreign table, p = partitioned table | ||
"WHERE (cl.relkind IN ('r', 'v', 'm', 'f', 'p')) \n", | ||
# do not return individual table partitions | ||
" AND NOT cl.relispartition \n", | ||
# do not return other people's temp schemas | ||
" AND (NOT pg_is_other_temp_schema(n.oid)) \n", | ||
# Return only objects (relations) which the current user may access | ||
# https://www.postgresql.org/docs/current/functions-info.html | ||
" AND (pg_has_role(cl.relowner, 'USAGE') \n", | ||
" OR has_table_privilege(cl.oid, 'SELECT, INSERT, UPDATE, DELETE, TRUNCATE, REFERENCES, TRIGGER') \n", | ||
" OR has_any_column_privilege(cl.oid, 'SELECT, INSERT, UPDATE, REFERENCES') \n", | ||
" ) \n" | ||
) | ||
} | ||
|
||
exists_table <- function(conn, id) { | ||
name <- id@name | ||
stopifnot("table" %in% names(name)) | ||
|
@@ -168,6 +209,14 @@ exists_table <- function(conn, id) { | |
} | ||
|
||
list_fields <- function(conn, id) { | ||
if (conn@system_catalogs) { | ||
list_fields_from_system_catalog(conn, id) | ||
} else { | ||
list_fields_from_info_schema(conn, id) | ||
} | ||
} | ||
|
||
list_fields_from_info_schema <- function(conn, id) { | ||
name <- id@name | ||
|
||
is_redshift <- is(conn, "RedshiftConnection") | ||
|
@@ -185,7 +234,7 @@ list_fields <- function(conn, id) { | |
# as there cannot be multiple tables with the same name in a single schema | ||
only_first <- FALSE | ||
|
||
# or we have to look the table up in the schemas on the search path | ||
# or we have to look the table up in the schemas on the search path | ||
} else if (is_redshift) { | ||
# A variant of the Postgres version that uses CTEs and generate_series() | ||
# instead of generate_subscripts(), the latter is not supported on Redshift | ||
|
@@ -211,10 +260,10 @@ list_fields <- function(conn, id) { | |
# How to unnest `current_schemas(true)` array with element number (works since v9.4): | ||
# https://stackoverflow.com/a/8767450/2114932 | ||
query <- paste0( | ||
"(", | ||
"SELECT * FROM unnest(current_schemas(true)) WITH ORDINALITY AS tbl(table_schema, nr) \n", | ||
"WHERE table_schema != 'pg_catalog'", | ||
") schemas_on_path" | ||
"(", | ||
"SELECT * FROM unnest(current_schemas(true)) WITH ORDINALITY AS tbl(table_schema, nr) \n", | ||
"WHERE table_schema != 'pg_catalog'", | ||
") schemas_on_path" | ||
) | ||
only_first <- TRUE | ||
} | ||
|
@@ -252,6 +301,29 @@ list_fields <- function(conn, id) { | |
fields | ||
} | ||
|
||
list_fields_from_system_catalog <- function(conn, id) { | ||
if (exists_table(conn, id)) { | ||
# we know from exists_table() that id@name["table"] exists | ||
# and the user has access priviledges | ||
tname_str <- stats::na.omit(id@name[c("schema", "table")]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have more places where we rely on named Which
Relatedly: Is r-dbi/DBItest#340 relevant for RPostgres? |
||
tname_qstr <- dbQuoteString(conn, paste(tname_str, collapse = ".")) | ||
# cast to `regclass` resolves the table name according to the current | ||
# `search_path` https://dba.stackexchange.com/a/75124 | ||
query <- | ||
paste0( | ||
"SELECT attname \n", | ||
"FROM pg_attribute \n", | ||
"WHERE attrelid = ", tname_qstr, "::regclass \n", | ||
" AND attnum > 0 \n", | ||
" AND NOT attisdropped \n", | ||
"ORDER BY attnum;" | ||
) | ||
dbGetQuery(conn, query)[[1]] | ||
} else { | ||
stop("Table ", dbQuoteIdentifier(conn, id), " not found.", call. = FALSE) | ||
} | ||
} | ||
|
||
find_temp_schema <- function(conn, fail_if_missing = TRUE) { | ||
if (!is.na(connection_get_temp_schema(conn@ptr))) | ||
return(connection_get_temp_schema(conn@ptr)) | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#' Run an expression that creates a materialized view and clean up afterwards. | ||
#' | ||
#' @param con PqConnection. The database connection. | ||
#' @param matview character. The materialized view name. | ||
#' @param query character. A SELECT, TABLE, or VALUES command to populate the | ||
#' materialized view. | ||
#' @param expr expression. The R expression to execute. | ||
#' @return the return value of the \code{expr}. | ||
#' @seealso https://www.postgresql.org/docs/current/sql-creatematerializedview.html | ||
with_matview <- function(con, matview, query, expr) { | ||
on.exit( | ||
DBI::dbExecute(con, paste0("DROP MATERIALIZED VIEW IF EXISTS ", matview)), | ||
add = TRUE | ||
) | ||
dbExecute( | ||
con, | ||
paste0( | ||
"CREATE MATERIALIZED VIEW IF NOT EXISTS ", matview, | ||
" AS ", query | ||
) | ||
) | ||
|
||
force(expr) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#' Run an expression that creates a schema and clean up afterwards. | ||
#' | ||
#' @param con PqConnection. The database connection. | ||
#' @param schema character. The schema name. | ||
#' @param expr expression. The R expression to execute. | ||
#' @return the return value of the \code{expr}. | ||
with_schema <- function(con, schema, expr) { | ||
on.exit( | ||
DBI::dbExecute(con, paste0("DROP SCHEMA IF EXISTS ", schema)), | ||
add = TRUE | ||
) | ||
DBI::dbExecute(con, paste0("CREATE SCHEMA IF NOT EXISTS ", schema)) | ||
force(expr) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
if (postgresHasDefault()) { | ||
with_database_connection(con = postgresDefault(system_catalogs = TRUE), { | ||
|
||
skip_if( | ||
is(con, "RedshiftConnection"), | ||
"Redshift doesn't expose system catalogs" | ||
) | ||
skip_if_not( | ||
dbExistsTable(con, Id(schema = "pg_catalog", table = "pg_class")), | ||
"`pg_catalog`.`pg_class` not available" | ||
) | ||
|
||
test_that("Materialized View is listed", { | ||
with_matview(con, "matview1", "SELECT 1 AS col1", { | ||
|
||
expect_true(dbExistsTable(con, "matview1")) | ||
expect_true("matview1" %in% dbListTables(con)) | ||
|
||
objects <- dbListObjects(con) | ||
quoted_tables <- | ||
vapply( | ||
objects$table, | ||
dbQuoteIdentifier, | ||
conn = con, | ||
character(1) | ||
) | ||
expect_true(dbQuoteIdentifier(con, "matview1") %in% quoted_tables) | ||
|
||
expect_true("col1" %in% dbListFields(con, "matview1")) | ||
}) | ||
}) | ||
|
||
test_that("Materialized View in custom schema is listed", { | ||
with_schema(con, "matschema1", { | ||
matview_id <- Id(schema = "matschema1", table = "matview1") | ||
matview_chr <- "matschema1.matview1" | ||
|
||
with_matview(con, matview_chr, "SELECT 1 AS col1", { | ||
|
||
expect_true(dbExistsTable(con, matview_id)) | ||
|
||
objects <- dbListObjects(con, prefix = Id(schema = "matschema1")) | ||
quoted_tables <- | ||
vapply( | ||
objects$table, | ||
dbQuoteIdentifier, | ||
conn = con, | ||
character(1) | ||
) | ||
expect_true(dbQuoteIdentifier(con, matview_id) %in% quoted_tables) | ||
|
||
expect_true("col1" %in% dbListFields(con, matview_id)) | ||
}) | ||
}) | ||
}) | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new argument name is not very clear, perhaps use something like
meta = c("system", "information_schema")
?I'm not in love with
meta
either, can you think of a better name?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
meta
seems quite broad and therefore not very clear as well, so how aboutmeta_schema
?If we'd want to stick with a boolean, it could also be
information_schema
, but it's probably better to use a character argument to keep it extendable?(Just documenting some related info:)
PostgreSQL calls the tables with the system information
System Catalogs
. There are alsoSystem Views
, however, likepg_matviews
. Both the Catalogs and the Views reside in thepg_catalog
schema.Redshift also has Materialized Views (out of scope for this PR). Their meta info resides (mostly) in
ST_MV_INFO
, one of the "system tables and views". The "System catalog tables", which seem to be legacy tables from the PostgreSQL v8 times, are a part of those as well.Long story short:
meta_schema = c("system", "information_schema")
seems to be the most fitting argument to specify which schema should be queried for information on tables, (materialized) views and columns.