From 8e189a9e4452cf287509691515bd14efec28e347 Mon Sep 17 00:00:00 2001 From: sdgamboa Date: Wed, 10 Apr 2024 08:31:23 -0400 Subject: [PATCH] update tests --- tests/testthat/test-getTaxonSignatures.R | 10 +- tests/testthat/test-importBugphyzz.R | 306 ++++++++++++----------- tests/testthat/test-makeSignatures.R | 8 +- tests/testthat/test-physiologies.R | 4 +- 4 files changed, 168 insertions(+), 160 deletions(-) diff --git a/tests/testthat/test-getTaxonSignatures.R b/tests/testthat/test-getTaxonSignatures.R index 157d0567..a31e1858 100644 --- a/tests/testthat/test-getTaxonSignatures.R +++ b/tests/testthat/test-getTaxonSignatures.R @@ -3,13 +3,13 @@ taxName <- "Escherichia coli" bp <- importBugphyzz() sigs_ids <- getTaxonSignatures(taxID, bp) sigs_tax <- getTaxonSignatures( - tax = taxName, bp = bp, tax_id_type = "Taxon_name" + tax = taxName, bp = bp, tax_id_type = "Taxon_name" ) test_that("getTaxonSignatures works with IDs", { - expect_gt(length(sigs_ids), 0) - expect_type(sigs_ids, "character") + expect_gt(length(sigs_ids), 0) + expect_type(sigs_ids, "character") }) test_that("getTaxonSignatures works with IDs", { - expect_gt(length(sigs_tax), 0) - expect_type(sigs_tax, "character") + expect_gt(length(sigs_tax), 0) + expect_type(sigs_tax, "character") }) diff --git a/tests/testthat/test-importBugphyzz.R b/tests/testthat/test-importBugphyzz.R index a64ed06d..de7dcaf8 100644 --- a/tests/testthat/test-importBugphyzz.R +++ b/tests/testthat/test-importBugphyzz.R @@ -4,184 +4,192 @@ library(purrr) expected_columns_multistate <- c( - NCBI_ID = "integer", Taxon_name = "character", - Rank = "character", - Attribute = "character", Attribute_value = "character", - Evidence = "character", - Frequency = "character", Score = "double", Attribute_source = "character", - Confidence_in_curation = "character", Attribute_type = "character", - Validation = "double" + NCBI_ID = "integer", Taxon_name = "character", + Rank = "character", + Attribute = "character", Attribute_value = "character", + Evidence = "character", + Frequency = "character", Score = "double", Attribute_source = "character", + Confidence_in_curation = "character", Attribute_type = "character", + Validation = "double" ) expected_columns_binary <- c( - NCBI_ID = "integer", Taxon_name = "character", - Rank = "character", - Attribute = "character", Attribute_value = "logical", - Evidence = "character", - Frequency = "character", Score = "double", Attribute_source = "character", - Confidence_in_curation = "character", Attribute_type = "character", - Validation = "double" + NCBI_ID = "integer", Taxon_name = "character", + Rank = "character", + Attribute = "character", Attribute_value = "logical", + Evidence = "character", + Frequency = "character", Score = "double", Attribute_source = "character", + Confidence_in_curation = "character", Attribute_type = "character", + Validation = "double" ) expected_columns_numeric <- c( - NCBI_ID = "integer", Taxon_name = "character", - Rank = "character", - Attribute = "character", Attribute_value = "double", - Evidence = "character", - Frequency = "character", Score = "double", Attribute_source = "character", - Confidence_in_curation = "character", Attribute_type = "character", - NSTI = "double", Validation = "double" + NCBI_ID = "integer", Taxon_name = "character", + Rank = "character", + Attribute = "character", Attribute_value = "double", + Evidence = "character", + Frequency = "character", Score = "double", Attribute_source = "character", + Confidence_in_curation = "character", Attribute_type = "character", + NSTI = "double", Validation = "double" ) checkColumnNames <- function(x) { - attr_type <- unique(x$Attribute_type) - if (attr_type == "multistate-intersection" | attr_type == "multistate-union") { - lgl_vct <- colnames(x) == names(expected_columns_multistate) - } else if (attr_type == "binary") { - lgl_vct <- colnames(x) == names(expected_columns_binary) - } else if (attr_type == "numeric") { - lgl_vct <- colnames(x) == names(expected_columns_numeric) - } - return(all(lgl_vct)) + attr_type <- unique(x$Attribute_type) + if (attr_type == "multistate-intersection" | attr_type == "multistate-union") { + lgl_vct <- colnames(x) == names(expected_columns_multistate) + } else if (attr_type == "binary") { + lgl_vct <- colnames(x) == names(expected_columns_binary) + } else if (attr_type == "numeric") { + lgl_vct <- colnames(x) == names(expected_columns_numeric) + } + return(all(lgl_vct)) } checkColumnTypes <- function(x) { - attr_type <- unique(x$Attribute_type) - if ("Validation" %in% colnames(x)) { - x$Validation <- as.double(x$Validation) - } - if ("NSTI" %in% colnames(x)) { - x$NSTI <- as.double(x$NSTI) - } - types <- map_chr(x, typeof) - if (attr_type == "multistate-intersection" | attr_type == "multistate-union") { - lgl_vct <- types == expected_columns_multistate - } else if (attr_type == "binary") { - lgl_vct <- types == expected_columns_binary - } else if (attr_type == "numeric") { - lgl_vct <- types == expected_columns_numeric - } - return(all(lgl_vct)) + attr_type <- unique(x$Attribute_type) + if ("Validation" %in% colnames(x)) { + x$Validation <- as.double(x$Validation) + } + if ("NSTI" %in% colnames(x)) { + x$NSTI <- as.double(x$NSTI) + } + types <- map_chr(x, typeof) + if (attr_type == "multistate-intersection" | attr_type == "multistate-union") { + lgl_vct <- types == expected_columns_multistate + } else if (attr_type == "binary") { + lgl_vct <- types == expected_columns_binary + } else if (attr_type == "numeric") { + lgl_vct <- types == expected_columns_numeric + } + return(all(lgl_vct)) } checkNAs <- function(x) { - select_col <- c( - "NCBI_ID", - "Taxon_name", - "Rank", - "Attribute", - "Attribute_value", - "Attribute_type", - "Evidence", - "Frequency", - "Score" - ) - x <- x[, select_col] - !any(purrr::map_lgl(x, ~ any(is.na(.x)))) + select_col <- c( + "NCBI_ID", + "Taxon_name", + "Rank", + "Attribute", + "Attribute_value", + "Attribute_type", + "Evidence", + "Frequency", + "Score" + ) + x <- x[, select_col] + !any(purrr::map_lgl(x, ~ any(is.na(.x)))) } checkCuration <- function(x) { - ## Valid values - Rank_vals <- c( - "superkingdom", "kingdom", "domain", "phylum", "class", "order", - "family", "genus","species", "strain" - ) - Attribute_type_vals <- c( - "multistate-union", "multistate-intersection", "binary", - "numeric" - ) - Frequency_vals <- c( - "always", "usually", "sometimes", "rarely", "never", "unknown" - ) - Evidence_vals <- c( - "exp", "igc", "nas", "tas", "tax", "asr" - ) - Confidence_in_curation_vals <- c( - "high", "medium", "low" - ) - - attr_type <- unique(x$Attribute_type) - - ## Columns omitted here are tested elsewhere - ## Attribute_type for numeric values - ## Score - ## NSTI - ## Validation - ## NCBI_ID - ## Taxon_name - - Rank_ok <- all(x$Rank %in% Rank_vals) - Attribute_type_ok <- attr_type %in% Attribute_type_vals - Evidence_ok <- all(x$Evidence %in% Evidence_vals) - Frequency_ok <- all(unique(x$Frequency) %in% Frequency_vals) - # Score_ok <- all(as.double(na.omit(x$Score)) >=0 & as.double(na.omit(x$Score <=1))) - Confidence_in_curation_ok <- all(na.omit(x$Confidence_in_curation) %in% Confidence_in_curation_vals) - - ## Attribute_source - srcs_tsv <- system.file("extdata", "attribute_sources.tsv", package = "bugphyzz") - srcs <- readr::read_tsv(srcs_tsv, show_col_types = FALSE) - Attribute_source_ok <- all(na.omit(x$Attribute_source) %in% srcs$Attribute_source) - - ## Attribute - attrs_tsv <- system.file("extdata", "attributes.tsv", package = "bugphyzz") - attrs_tbl <- readr::read_tsv(attrs_tsv, show_col_types = FALSE) |> - dplyr::mutate(attribute_group = tolower(attribute_group)) - attrs <- attrs_tbl |> - dplyr::mutate(attribute_group = strsplit(attribute_group, ";")) |> - dplyr::pull(attribute_group) |> - unlist() |> - unique() |> - {\(y) y[!is.na(y)]}() - Attribute_ok <- unique(x$Attribute) %in% attrs - - results <- c( - Rank = Rank_ok, - Attribute_type = Attribute_type_ok, - Frequency = Frequency_ok, - Evidence = Evidence_ok, - Confidence_in_curation = Confidence_in_curation_ok, - Attribute_source = Attribute_source_ok, - Attribute = Attribute_ok + ## Valid values + Rank_vals <- c( + "superkingdom", "kingdom", "domain", "phylum", "class", "order", + "family", "genus","species", "strain" + ) + Attribute_type_vals <- c( + "multistate-union", "multistate-intersection", "binary", + "numeric" + ) + Frequency_vals <- c( + "always", "usually", "sometimes", "rarely", "never", "unknown" + ) + Evidence_vals <- c( + "exp", "igc", "nas", "tas", "tax", "asr" + ) + Confidence_in_curation_vals <- c( + "high", "medium", "low" ) - ## Attribute_value - if (attr_type %in% c("multistate-union", "multistate-intersection")) { - Attribute_value_vals <- attrs_tbl |> - dplyr::filter(grepl(unique(x$Attribute), attribute_group)) |> - dplyr::pull(attribute) |> - unique() |> - {\(y) y[!is.na(y)]}() |> - tolower() - - Attribute_value_ok <- all(x$Attribute_value %in% Attribute_value_vals) - results <- c(results, Attribute_value = Attribute_value_ok) - } - return(all(results)) - # return(results) + attr_type <- unique(x$Attribute_type) + + ## Columns omitted here are tested elsewhere + ## Attribute_type for numeric values + ## Score + ## NSTI + ## Validation + ## NCBI_ID + ## Taxon_name + + Rank_ok <- all(x$Rank %in% Rank_vals) + Attribute_type_ok <- attr_type %in% Attribute_type_vals + Evidence_ok <- all(x$Evidence %in% Evidence_vals) + Frequency_ok <- all(unique(x$Frequency) %in% Frequency_vals) + # Score_ok <- all(as.double(na.omit(x$Score)) >=0 & as.double(na.omit(x$Score <=1))) + Confidence_in_curation_ok <- all(na.omit(x$Confidence_in_curation) %in% Confidence_in_curation_vals) + + ## Attribute_source + srcs_tsv <- system.file("extdata", "attribute_sources.tsv", package = "bugphyzz") + srcs <- readr::read_tsv(srcs_tsv, show_col_types = FALSE) + Attribute_source_ok <- all(na.omit(x$Attribute_source) %in% srcs$Attribute_source) + + ## Attribute + attrs_tsv <- system.file("extdata", "attributes.tsv", package = "bugphyzz") + attrs_tbl <- readr::read_tsv(attrs_tsv, show_col_types = FALSE) |> + dplyr::mutate(attribute_group = tolower(attribute_group)) + attrs <- attrs_tbl |> + dplyr::mutate(attribute_group = strsplit(attribute_group, ";")) |> + dplyr::pull(attribute_group) |> + unlist() |> + unique() |> + {\(y) y[!is.na(y)]}() + Attribute_ok <- unique(x$Attribute) %in% attrs + + results <- c( + Rank = Rank_ok, + Attribute_type = Attribute_type_ok, + Frequency = Frequency_ok, + Evidence = Evidence_ok, + Confidence_in_curation = Confidence_in_curation_ok, + Attribute_source = Attribute_source_ok, + Attribute = Attribute_ok + ) + + ## Attribute_value + if (attr_type %in% c("multistate-union", "multistate-intersection")) { + Attribute_value_vals <- attrs_tbl |> + dplyr::filter(grepl(unique(x$Attribute), attribute_group)) |> + dplyr::pull(attribute) |> + unique() |> + {\(y) y[!is.na(y)]}() |> + tolower() + + Attribute_value_ok <- all(x$Attribute_value %in% Attribute_value_vals) + results <- c(results, Attribute_value = Attribute_value_ok) + } + return(all(results)) +} + +## Make sure that there are not duplicates by accident +checkUniqueAnnotations <- function(x) { + dat <- x |> + dplyr::select(NCBI_ID, Attribute, Attribute_value) + !any(duplicated(dat)) } # tests ------------------------------------------------------------------- test_that("importBugphyzz works with devel", { - bp <- importBugphyzz(version = "devel", force_download = TRUE) - expect_true(all("data.frame" == map_chr(bp, class))) - expect_true(all(map_lgl(bp, ~ nrow(.x) > 0))) - expect_true(all(map_lgl(bp, checkColumnNames))) - expect_true(all(map_lgl(bp, checkColumnTypes))) - expect_true(all(map_lgl(bp, checkNAs))) - expect_true(all(map_lgl(bp, checkCuration))) + bp <- importBugphyzz(version = "devel", force_download = TRUE) + expect_true(all("data.frame" == map_chr(bp, class))) + expect_true(all(map_lgl(bp, ~ nrow(.x) > 0))) + expect_true(all(map_lgl(bp, checkColumnNames))) + expect_true(all(map_lgl(bp, checkColumnTypes))) + expect_true(all(map_lgl(bp, checkNAs))) + expect_true(all(map_lgl(bp, checkCuration))) + expect_true(all(map_lgl(bp, checkUniqueAnnotations))) }) test_that("importBugphyzz works with hash", { - bp <- importBugphyzz(version = "c2d34c0", force_download = TRUE) - expect_true(all("data.frame" == map_chr(bp, class))) - expect_true(all(map_lgl(bp, ~ nrow(.x) > 0))) - expect_true(all(map_lgl(bp, checkColumnNames))) - expect_true(all(map_lgl(bp, checkColumnTypes))) - expect_true(all(map_lgl(bp, checkNAs))) - expect_true(all(map_lgl(bp, checkCuration))) + bp <- importBugphyzz(version = "c2d34c0", force_download = TRUE) + expect_true(all("data.frame" == map_chr(bp, class))) + expect_true(all(map_lgl(bp, ~ nrow(.x) > 0))) + expect_true(all(map_lgl(bp, checkColumnNames))) + expect_true(all(map_lgl(bp, checkColumnTypes))) + expect_true(all(map_lgl(bp, checkNAs))) + expect_true(all(map_lgl(bp, checkCuration))) + expect_true(all(map_lgl(bp, checkUniqueAnnotations))) }) ## TODO create test for using Zenodo test_that("importBugphyzz doesn't work with other words", { - expect_error(importBugphyzz(version = "abcd-1234", force_download = TRUE)) + expect_error(importBugphyzz(version = "abcd-1234", force_download = TRUE)) }) diff --git a/tests/testthat/test-makeSignatures.R b/tests/testthat/test-makeSignatures.R index dfc5e5ed..a8fc7d25 100644 --- a/tests/testthat/test-makeSignatures.R +++ b/tests/testthat/test-makeSignatures.R @@ -1,13 +1,13 @@ library(purrr) bp <- importBugphyzz() sigsNames <- map(bp, ~ makeSignatures(.x, tax_id_type = "Taxon_name")) |> - list_flatten(name_spec = "{inner}") + list_flatten(name_spec = "{inner}") sigsIDs <- map(bp, ~ makeSignatures(.x, tax_id_type = "NCBI_ID")) |> - list_flatten(name_spec = "{inner}") + list_flatten(name_spec = "{inner}") test_that("makeSignatures works with IDs", { - expect_true(all(map_lgl(sigsIDs, is.integer))) + expect_true(all(map_lgl(sigsIDs, is.integer))) }) test_that("makeSignatures works with taxon names", { - expect_true(all(map_lgl(sigsNames, is.character))) + expect_true(all(map_lgl(sigsNames, is.character))) }) diff --git a/tests/testthat/test-physiologies.R b/tests/testthat/test-physiologies.R index 1cad2bed..a7dbf441 100644 --- a/tests/testthat/test-physiologies.R +++ b/tests/testthat/test-physiologies.R @@ -2,6 +2,6 @@ library(purrr) pnames <- showPhys() p <- suppressWarnings(physiologies()) test_that("physiologies works", { - expect_true(all(map_lgl(p, is.data.frame))) - expect_true(all(names(p) == pnames)) + expect_true(all(map_lgl(p, is.data.frame))) + expect_true(all(names(p) == pnames)) })