Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sdgamboa committed Apr 10, 2024
1 parent 2f3439d commit 8e189a9
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 160 deletions.
10 changes: 5 additions & 5 deletions tests/testthat/test-getTaxonSignatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ taxName <- "Escherichia coli"
bp <- importBugphyzz()
sigs_ids <- getTaxonSignatures(taxID, bp)
sigs_tax <- getTaxonSignatures(
tax = taxName, bp = bp, tax_id_type = "Taxon_name"
tax = taxName, bp = bp, tax_id_type = "Taxon_name"
)
test_that("getTaxonSignatures works with IDs", {
expect_gt(length(sigs_ids), 0)
expect_type(sigs_ids, "character")
expect_gt(length(sigs_ids), 0)
expect_type(sigs_ids, "character")
})
test_that("getTaxonSignatures works with IDs", {
expect_gt(length(sigs_tax), 0)
expect_type(sigs_tax, "character")
expect_gt(length(sigs_tax), 0)
expect_type(sigs_tax, "character")
})
306 changes: 157 additions & 149 deletions tests/testthat/test-importBugphyzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,184 +4,192 @@
library(purrr)

expected_columns_multistate <- c(
NCBI_ID = "integer", Taxon_name = "character",
Rank = "character",
Attribute = "character", Attribute_value = "character",
Evidence = "character",
Frequency = "character", Score = "double", Attribute_source = "character",
Confidence_in_curation = "character", Attribute_type = "character",
Validation = "double"
NCBI_ID = "integer", Taxon_name = "character",
Rank = "character",
Attribute = "character", Attribute_value = "character",
Evidence = "character",
Frequency = "character", Score = "double", Attribute_source = "character",
Confidence_in_curation = "character", Attribute_type = "character",
Validation = "double"
)
expected_columns_binary <- c(
NCBI_ID = "integer", Taxon_name = "character",
Rank = "character",
Attribute = "character", Attribute_value = "logical",
Evidence = "character",
Frequency = "character", Score = "double", Attribute_source = "character",
Confidence_in_curation = "character", Attribute_type = "character",
Validation = "double"
NCBI_ID = "integer", Taxon_name = "character",
Rank = "character",
Attribute = "character", Attribute_value = "logical",
Evidence = "character",
Frequency = "character", Score = "double", Attribute_source = "character",
Confidence_in_curation = "character", Attribute_type = "character",
Validation = "double"
)
expected_columns_numeric <- c(
NCBI_ID = "integer", Taxon_name = "character",
Rank = "character",
Attribute = "character", Attribute_value = "double",
Evidence = "character",
Frequency = "character", Score = "double", Attribute_source = "character",
Confidence_in_curation = "character", Attribute_type = "character",
NSTI = "double", Validation = "double"
NCBI_ID = "integer", Taxon_name = "character",
Rank = "character",
Attribute = "character", Attribute_value = "double",
Evidence = "character",
Frequency = "character", Score = "double", Attribute_source = "character",
Confidence_in_curation = "character", Attribute_type = "character",
NSTI = "double", Validation = "double"
)

checkColumnNames <- function(x) {
attr_type <- unique(x$Attribute_type)
if (attr_type == "multistate-intersection" | attr_type == "multistate-union") {
lgl_vct <- colnames(x) == names(expected_columns_multistate)
} else if (attr_type == "binary") {
lgl_vct <- colnames(x) == names(expected_columns_binary)
} else if (attr_type == "numeric") {
lgl_vct <- colnames(x) == names(expected_columns_numeric)
}
return(all(lgl_vct))
attr_type <- unique(x$Attribute_type)
if (attr_type == "multistate-intersection" | attr_type == "multistate-union") {
lgl_vct <- colnames(x) == names(expected_columns_multistate)
} else if (attr_type == "binary") {
lgl_vct <- colnames(x) == names(expected_columns_binary)
} else if (attr_type == "numeric") {
lgl_vct <- colnames(x) == names(expected_columns_numeric)
}
return(all(lgl_vct))
}

checkColumnTypes <- function(x) {
attr_type <- unique(x$Attribute_type)
if ("Validation" %in% colnames(x)) {
x$Validation <- as.double(x$Validation)
}
if ("NSTI" %in% colnames(x)) {
x$NSTI <- as.double(x$NSTI)
}
types <- map_chr(x, typeof)
if (attr_type == "multistate-intersection" | attr_type == "multistate-union") {
lgl_vct <- types == expected_columns_multistate
} else if (attr_type == "binary") {
lgl_vct <- types == expected_columns_binary
} else if (attr_type == "numeric") {
lgl_vct <- types == expected_columns_numeric
}
return(all(lgl_vct))
attr_type <- unique(x$Attribute_type)
if ("Validation" %in% colnames(x)) {
x$Validation <- as.double(x$Validation)
}
if ("NSTI" %in% colnames(x)) {
x$NSTI <- as.double(x$NSTI)
}
types <- map_chr(x, typeof)
if (attr_type == "multistate-intersection" | attr_type == "multistate-union") {
lgl_vct <- types == expected_columns_multistate
} else if (attr_type == "binary") {
lgl_vct <- types == expected_columns_binary
} else if (attr_type == "numeric") {
lgl_vct <- types == expected_columns_numeric
}
return(all(lgl_vct))
}

checkNAs <- function(x) {
select_col <- c(
"NCBI_ID",
"Taxon_name",
"Rank",
"Attribute",
"Attribute_value",
"Attribute_type",
"Evidence",
"Frequency",
"Score"
)
x <- x[, select_col]
!any(purrr::map_lgl(x, ~ any(is.na(.x))))
select_col <- c(
"NCBI_ID",
"Taxon_name",
"Rank",
"Attribute",
"Attribute_value",
"Attribute_type",
"Evidence",
"Frequency",
"Score"
)
x <- x[, select_col]
!any(purrr::map_lgl(x, ~ any(is.na(.x))))
}

checkCuration <- function(x) {

## Valid values
Rank_vals <- c(
"superkingdom", "kingdom", "domain", "phylum", "class", "order",
"family", "genus","species", "strain"
)
Attribute_type_vals <- c(
"multistate-union", "multistate-intersection", "binary",
"numeric"
)
Frequency_vals <- c(
"always", "usually", "sometimes", "rarely", "never", "unknown"
)
Evidence_vals <- c(
"exp", "igc", "nas", "tas", "tax", "asr"
)
Confidence_in_curation_vals <- c(
"high", "medium", "low"
)

attr_type <- unique(x$Attribute_type)

## Columns omitted here are tested elsewhere
## Attribute_type for numeric values
## Score
## NSTI
## Validation
## NCBI_ID
## Taxon_name

Rank_ok <- all(x$Rank %in% Rank_vals)
Attribute_type_ok <- attr_type %in% Attribute_type_vals
Evidence_ok <- all(x$Evidence %in% Evidence_vals)
Frequency_ok <- all(unique(x$Frequency) %in% Frequency_vals)
# Score_ok <- all(as.double(na.omit(x$Score)) >=0 & as.double(na.omit(x$Score <=1)))
Confidence_in_curation_ok <- all(na.omit(x$Confidence_in_curation) %in% Confidence_in_curation_vals)

## Attribute_source
srcs_tsv <- system.file("extdata", "attribute_sources.tsv", package = "bugphyzz")
srcs <- readr::read_tsv(srcs_tsv, show_col_types = FALSE)
Attribute_source_ok <- all(na.omit(x$Attribute_source) %in% srcs$Attribute_source)

## Attribute
attrs_tsv <- system.file("extdata", "attributes.tsv", package = "bugphyzz")
attrs_tbl <- readr::read_tsv(attrs_tsv, show_col_types = FALSE) |>
dplyr::mutate(attribute_group = tolower(attribute_group))
attrs <- attrs_tbl |>
dplyr::mutate(attribute_group = strsplit(attribute_group, ";")) |>
dplyr::pull(attribute_group) |>
unlist() |>
unique() |>
{\(y) y[!is.na(y)]}()
Attribute_ok <- unique(x$Attribute) %in% attrs

results <- c(
Rank = Rank_ok,
Attribute_type = Attribute_type_ok,
Frequency = Frequency_ok,
Evidence = Evidence_ok,
Confidence_in_curation = Confidence_in_curation_ok,
Attribute_source = Attribute_source_ok,
Attribute = Attribute_ok
## Valid values
Rank_vals <- c(
"superkingdom", "kingdom", "domain", "phylum", "class", "order",
"family", "genus","species", "strain"
)
Attribute_type_vals <- c(
"multistate-union", "multistate-intersection", "binary",
"numeric"
)
Frequency_vals <- c(
"always", "usually", "sometimes", "rarely", "never", "unknown"
)
Evidence_vals <- c(
"exp", "igc", "nas", "tas", "tax", "asr"
)
Confidence_in_curation_vals <- c(
"high", "medium", "low"
)

## Attribute_value
if (attr_type %in% c("multistate-union", "multistate-intersection")) {
Attribute_value_vals <- attrs_tbl |>
dplyr::filter(grepl(unique(x$Attribute), attribute_group)) |>
dplyr::pull(attribute) |>
unique() |>
{\(y) y[!is.na(y)]}() |>
tolower()

Attribute_value_ok <- all(x$Attribute_value %in% Attribute_value_vals)
results <- c(results, Attribute_value = Attribute_value_ok)
}
return(all(results))
# return(results)
attr_type <- unique(x$Attribute_type)

## Columns omitted here are tested elsewhere
## Attribute_type for numeric values
## Score
## NSTI
## Validation
## NCBI_ID
## Taxon_name

Rank_ok <- all(x$Rank %in% Rank_vals)
Attribute_type_ok <- attr_type %in% Attribute_type_vals
Evidence_ok <- all(x$Evidence %in% Evidence_vals)
Frequency_ok <- all(unique(x$Frequency) %in% Frequency_vals)
# Score_ok <- all(as.double(na.omit(x$Score)) >=0 & as.double(na.omit(x$Score <=1)))
Confidence_in_curation_ok <- all(na.omit(x$Confidence_in_curation) %in% Confidence_in_curation_vals)

## Attribute_source
srcs_tsv <- system.file("extdata", "attribute_sources.tsv", package = "bugphyzz")
srcs <- readr::read_tsv(srcs_tsv, show_col_types = FALSE)
Attribute_source_ok <- all(na.omit(x$Attribute_source) %in% srcs$Attribute_source)

## Attribute
attrs_tsv <- system.file("extdata", "attributes.tsv", package = "bugphyzz")
attrs_tbl <- readr::read_tsv(attrs_tsv, show_col_types = FALSE) |>
dplyr::mutate(attribute_group = tolower(attribute_group))
attrs <- attrs_tbl |>
dplyr::mutate(attribute_group = strsplit(attribute_group, ";")) |>
dplyr::pull(attribute_group) |>
unlist() |>
unique() |>
{\(y) y[!is.na(y)]}()
Attribute_ok <- unique(x$Attribute) %in% attrs

results <- c(
Rank = Rank_ok,
Attribute_type = Attribute_type_ok,
Frequency = Frequency_ok,
Evidence = Evidence_ok,
Confidence_in_curation = Confidence_in_curation_ok,
Attribute_source = Attribute_source_ok,
Attribute = Attribute_ok
)

## Attribute_value
if (attr_type %in% c("multistate-union", "multistate-intersection")) {
Attribute_value_vals <- attrs_tbl |>
dplyr::filter(grepl(unique(x$Attribute), attribute_group)) |>
dplyr::pull(attribute) |>
unique() |>
{\(y) y[!is.na(y)]}() |>
tolower()

Attribute_value_ok <- all(x$Attribute_value %in% Attribute_value_vals)
results <- c(results, Attribute_value = Attribute_value_ok)
}
return(all(results))
}

## Make sure that there are not duplicates by accident
checkUniqueAnnotations <- function(x) {
dat <- x |>
dplyr::select(NCBI_ID, Attribute, Attribute_value)
!any(duplicated(dat))
}

# tests -------------------------------------------------------------------

test_that("importBugphyzz works with devel", {
bp <- importBugphyzz(version = "devel", force_download = TRUE)
expect_true(all("data.frame" == map_chr(bp, class)))
expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
expect_true(all(map_lgl(bp, checkColumnNames)))
expect_true(all(map_lgl(bp, checkColumnTypes)))
expect_true(all(map_lgl(bp, checkNAs)))
expect_true(all(map_lgl(bp, checkCuration)))
bp <- importBugphyzz(version = "devel", force_download = TRUE)
expect_true(all("data.frame" == map_chr(bp, class)))
expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
expect_true(all(map_lgl(bp, checkColumnNames)))
expect_true(all(map_lgl(bp, checkColumnTypes)))
expect_true(all(map_lgl(bp, checkNAs)))
expect_true(all(map_lgl(bp, checkCuration)))
expect_true(all(map_lgl(bp, checkUniqueAnnotations)))
})

test_that("importBugphyzz works with hash", {
bp <- importBugphyzz(version = "c2d34c0", force_download = TRUE)
expect_true(all("data.frame" == map_chr(bp, class)))
expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
expect_true(all(map_lgl(bp, checkColumnNames)))
expect_true(all(map_lgl(bp, checkColumnTypes)))
expect_true(all(map_lgl(bp, checkNAs)))
expect_true(all(map_lgl(bp, checkCuration)))
bp <- importBugphyzz(version = "c2d34c0", force_download = TRUE)
expect_true(all("data.frame" == map_chr(bp, class)))
expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
expect_true(all(map_lgl(bp, checkColumnNames)))
expect_true(all(map_lgl(bp, checkColumnTypes)))
expect_true(all(map_lgl(bp, checkNAs)))
expect_true(all(map_lgl(bp, checkCuration)))
expect_true(all(map_lgl(bp, checkUniqueAnnotations)))
})

## TODO create test for using Zenodo
test_that("importBugphyzz doesn't work with other words", {
expect_error(importBugphyzz(version = "abcd-1234", force_download = TRUE))
expect_error(importBugphyzz(version = "abcd-1234", force_download = TRUE))
})
8 changes: 4 additions & 4 deletions tests/testthat/test-makeSignatures.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
library(purrr)
bp <- importBugphyzz()
sigsNames <- map(bp, ~ makeSignatures(.x, tax_id_type = "Taxon_name")) |>
list_flatten(name_spec = "{inner}")
list_flatten(name_spec = "{inner}")
sigsIDs <- map(bp, ~ makeSignatures(.x, tax_id_type = "NCBI_ID")) |>
list_flatten(name_spec = "{inner}")
list_flatten(name_spec = "{inner}")

test_that("makeSignatures works with IDs", {
expect_true(all(map_lgl(sigsIDs, is.integer)))
expect_true(all(map_lgl(sigsIDs, is.integer)))
})
test_that("makeSignatures works with taxon names", {
expect_true(all(map_lgl(sigsNames, is.character)))
expect_true(all(map_lgl(sigsNames, is.character)))
})
4 changes: 2 additions & 2 deletions tests/testthat/test-physiologies.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ library(purrr)
pnames <- showPhys()
p <- suppressWarnings(physiologies())
test_that("physiologies works", {
expect_true(all(map_lgl(p, is.data.frame)))
expect_true(all(names(p) == pnames))
expect_true(all(map_lgl(p, is.data.frame)))
expect_true(all(names(p) == pnames))
})

0 comments on commit 8e189a9

Please sign in to comment.