From 11747bdbfee73c72c87d8506dfe76a4116d6a3b4 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Tue, 13 Dec 2022 18:12:20 -0700 Subject: [PATCH 1/8] Coerce input to data.frame if needed --- R/schema_utils.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/schema_utils.R b/R/schema_utils.R index 13b33721..4069b75e 100644 --- a/R/schema_utils.R +++ b/R/schema_utils.R @@ -20,6 +20,7 @@ as_table_schema <- function(df, schema, list_truncate = FALSE) { + if("data.table" %in% class(df)) df <- as.data.frame(df) if("synapseclient.table.Schema" %in% class(schema) && reticulate::py_has_attr(schema, "columns_to_store")) { col_schema <- schema$columns_to_store } else { From d3521fc47680a19d96edd63d5207ea1f7efc8460 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Tue, 13 Dec 2022 18:13:20 -0700 Subject: [PATCH 2/8] Reference correct variable --- R/schema_utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/schema_utils.R b/R/schema_utils.R index 4069b75e..b0342a35 100644 --- a/R/schema_utils.R +++ b/R/schema_utils.R @@ -42,7 +42,7 @@ as_table_schema <- function(df, if(grepl("STRING", col_type[i])) { maxsize <- col_schema[[i]]$maximumSize size_fail <- sapply(values, function(x) any(nchar(x) > maxsize)) - if(any(size_fail)) stop(paste("Characters in", names(df)[i], "exceeds max size of", maxlen)) + if(any(size_fail)) stop(paste("Characters in", names(df)[i], "exceeds max size of", maxsize)) } if(grepl("*_LIST", col_type[i])) { maxlen <- col_schema[[i]]$maximumListLength From ae4494f214fa3cbab01eaac7f516057840ee5150 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Tue, 13 Dec 2022 18:14:57 -0700 Subject: [PATCH 3/8] Add additional check for NA --- R/schema_utils.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/schema_utils.R b/R/schema_utils.R index b0342a35..221c56cf 100644 --- a/R/schema_utils.R +++ b/R/schema_utils.R @@ -41,6 +41,7 @@ as_table_schema <- function(df, values <- df[[i]] if(grepl("STRING", col_type[i])) { maxsize <- col_schema[[i]]$maximumSize + if(anyNA(values)) stop("Please remove NA values from STRING column", names(df)[i]) size_fail <- sapply(values, function(x) any(nchar(x) > maxsize)) if(any(size_fail)) stop(paste("Characters in", names(df)[i], "exceeds max size of", maxsize)) } From 1b589dfb1b95370d66ee233c45a4ec61b5132865 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Tue, 13 Dec 2022 18:43:33 -0700 Subject: [PATCH 4/8] Rename test file to disambiguate from new related test --- tests/testthat/{test_schema_utils.R => test_json_schema_utils.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/testthat/{test_schema_utils.R => test_json_schema_utils.R} (100%) diff --git a/tests/testthat/test_schema_utils.R b/tests/testthat/test_json_schema_utils.R similarity index 100% rename from tests/testthat/test_schema_utils.R rename to tests/testthat/test_json_schema_utils.R From d3a40d6cf40686f2d36c2f69fdb1b540c3fbb127 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Tue, 13 Dec 2022 19:47:41 -0700 Subject: [PATCH 5/8] Add some of my favorite tests --- tests/testthat/test_synapse_schema_utils.R | 52 ++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 tests/testthat/test_synapse_schema_utils.R diff --git a/tests/testthat/test_synapse_schema_utils.R b/tests/testthat/test_synapse_schema_utils.R new file mode 100644 index 00000000..be557882 --- /dev/null +++ b/tests/testthat/test_synapse_schema_utils.R @@ -0,0 +1,52 @@ +# Test fixture is table syn49378540 on Synapse, with schema that includes INTEGER and STRING and STRING_LIST columns with character size and character + list size limits. +# These tests mainly focus on the character and list validation and JSON encoding functionality that `as_table_schema` provides +# in order to successfully append data conforming to an existing Synapse schema + +test_that("`as_table_schema` works as expected for data that can be fit into schema without issue", { + test_fixture <- "syn49378540" + test_data <- data.frame(Movie = "The Sound of Music", + Year = 1965, + Favorites = list("raindrops","whiskers", "kettles")) + test_data_storable <- as_table_schema(test_data, schema = test_fixture) + testthat::expect_no_error(.syn$store(test_data_storable)) + +}) + +test_that("`as_table_schema` errors for data with missing column", { + test_fixture <- "syn49378540" + test_data <- data.frame(Movie = "The Sound of Music", + Year = 1965) + testthat::expect_error(as_table_schema(test_data, schema = test_fixture)) + +}) + + +test_that("`as_table_schema` errors for data that exceeds list length as specified in schema for LIST column and truncation is not allowed", { + test_fixture <- "syn49378540" + test_data <- data.frame(Movie = "The Sound of Music", + Year = 1965, + Favorites = list("raindrops", "whiskers", "kettles", "mittens")) # exceeds list length of 3 + testthat::expect_error(as_table_schema(test_data, schema = test_fixture, list_truncate = FALSE)) + +}) + + +test_that("`as_table_schema` returns result with warning for data that exceeds list length specified in schema for LIST column but truncated to fit", { + test_fixture <- "syn49378540" + test_data <- data.frame(Movie = "The Sound of Music", + Year = 1965, + Favorites = list("raindrops", "whiskers", "kettles", "mittens")) # exceeds list length of 3 + testthat::expect_warning(as_table_schema(test_data, schema = test_fixture, list_truncate = TRUE)) + +}) + + +test_that("`as_table_schema` errors for data that exceeds character limits specified in schema for STRING_LIST column", { + test_fixture <- "syn49378540" + test_data <- data.frame(Movie = "The Sound of Music", + Year = 1965, + Favorites = list("raindrops on roses", "whiskers")) # exceeds character size of 15 + testthat::expect_error(as_table_schema(test_data, schema = test_fixture)) + +}) + From e5665b7f5d427877e7a74c94e3091dd45739af5b Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 15 Dec 2022 10:18:27 -0700 Subject: [PATCH 6/8] Remember tests and then I don't feel so bad --- tests/testthat/test_synapse_schema_utils.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test_synapse_schema_utils.R b/tests/testthat/test_synapse_schema_utils.R index be557882..6d0b2f0b 100644 --- a/tests/testthat/test_synapse_schema_utils.R +++ b/tests/testthat/test_synapse_schema_utils.R @@ -6,10 +6,9 @@ test_that("`as_table_schema` works as expected for data that can be fit into sch test_fixture <- "syn49378540" test_data <- data.frame(Movie = "The Sound of Music", Year = 1965, - Favorites = list("raindrops","whiskers", "kettles")) + Favorites = I(list(list("raindrops","whiskers", "kettles")))) test_data_storable <- as_table_schema(test_data, schema = test_fixture) - testthat::expect_no_error(.syn$store(test_data_storable)) - + testthat::expect_s3_class(.syn$store(test_data_storable), "synapseclient.table.CsvFileTable") }) test_that("`as_table_schema` errors for data with missing column", { @@ -25,7 +24,7 @@ test_that("`as_table_schema` errors for data that exceeds list length as specifi test_fixture <- "syn49378540" test_data <- data.frame(Movie = "The Sound of Music", Year = 1965, - Favorites = list("raindrops", "whiskers", "kettles", "mittens")) # exceeds list length of 3 + Favorites = I(list(list("raindrops", "whiskers", "kettles", "mittens")))) # exceeds list length of 3 testthat::expect_error(as_table_schema(test_data, schema = test_fixture, list_truncate = FALSE)) }) @@ -35,7 +34,7 @@ test_that("`as_table_schema` returns result with warning for data that exceeds l test_fixture <- "syn49378540" test_data <- data.frame(Movie = "The Sound of Music", Year = 1965, - Favorites = list("raindrops", "whiskers", "kettles", "mittens")) # exceeds list length of 3 + Favorites = I(list(list("raindrops", "whiskers", "kettles", "mittens")))) # exceeds list length of 3 testthat::expect_warning(as_table_schema(test_data, schema = test_fixture, list_truncate = TRUE)) }) @@ -45,7 +44,7 @@ test_that("`as_table_schema` errors for data that exceeds character limits speci test_fixture <- "syn49378540" test_data <- data.frame(Movie = "The Sound of Music", Year = 1965, - Favorites = list("raindrops on roses", "whiskers")) # exceeds character size of 15 + Favorites = I(list(list("raindrops on roses", "whiskers")))) # exceeds character size of 15 for first value testthat::expect_error(as_table_schema(test_data, schema = test_fixture)) }) From 4daa0a97f7639eb862b85f7157b90a5598193f28 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Thu, 15 Dec 2022 10:28:30 -0700 Subject: [PATCH 7/8] Add skip usual test conditions --- tests/testthat/test_synapse_schema_utils.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/testthat/test_synapse_schema_utils.R b/tests/testthat/test_synapse_schema_utils.R index 6d0b2f0b..944aed3a 100644 --- a/tests/testthat/test_synapse_schema_utils.R +++ b/tests/testthat/test_synapse_schema_utils.R @@ -3,6 +3,9 @@ # in order to successfully append data conforming to an existing Synapse schema test_that("`as_table_schema` works as expected for data that can be fit into schema without issue", { + skip_if_no_synapseclient() + skip_if_no_pandas() + skip_if_no_login() test_fixture <- "syn49378540" test_data <- data.frame(Movie = "The Sound of Music", Year = 1965, @@ -12,6 +15,9 @@ test_that("`as_table_schema` works as expected for data that can be fit into sch }) test_that("`as_table_schema` errors for data with missing column", { + skip_if_no_synapseclient() + skip_if_no_pandas() + skip_if_no_login() test_fixture <- "syn49378540" test_data <- data.frame(Movie = "The Sound of Music", Year = 1965) @@ -21,6 +27,9 @@ test_that("`as_table_schema` errors for data with missing column", { test_that("`as_table_schema` errors for data that exceeds list length as specified in schema for LIST column and truncation is not allowed", { + skip_if_no_synapseclient() + skip_if_no_pandas() + skip_if_no_login() test_fixture <- "syn49378540" test_data <- data.frame(Movie = "The Sound of Music", Year = 1965, @@ -31,6 +40,9 @@ test_that("`as_table_schema` errors for data that exceeds list length as specifi test_that("`as_table_schema` returns result with warning for data that exceeds list length specified in schema for LIST column but truncated to fit", { + skip_if_no_synapseclient() + skip_if_no_pandas() + skip_if_no_login() test_fixture <- "syn49378540" test_data <- data.frame(Movie = "The Sound of Music", Year = 1965, @@ -41,6 +53,9 @@ test_that("`as_table_schema` returns result with warning for data that exceeds l test_that("`as_table_schema` errors for data that exceeds character limits specified in schema for STRING_LIST column", { + skip_if_no_synapseclient() + skip_if_no_pandas() + skip_if_no_login() test_fixture <- "syn49378540" test_data <- data.frame(Movie = "The Sound of Music", Year = 1965, From 7affc8e57423bded4843343302afce0ffa40726a Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Wed, 21 Dec 2022 09:12:37 -0700 Subject: [PATCH 8/8] Add login check as review suggested --- R/schema_utils.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/schema_utils.R b/R/schema_utils.R index 221c56cf..ca9115f6 100644 --- a/R/schema_utils.R +++ b/R/schema_utils.R @@ -20,6 +20,7 @@ as_table_schema <- function(df, schema, list_truncate = FALSE) { + .check_login() if("data.table" %in% class(df)) df <- as.data.frame(df) if("synapseclient.table.Schema" %in% class(schema) && reticulate::py_has_attr(schema, "columns_to_store")) { col_schema <- schema$columns_to_store