From 8e189a9e4452cf287509691515bd14efec28e347 Mon Sep 17 00:00:00 2001
From: sdgamboa <samuel.gamboa.tuz@gmail.com>
Date: Wed, 10 Apr 2024 08:31:23 -0400
Subject: [PATCH] update tests

---
 tests/testthat/test-getTaxonSignatures.R |  10 +-
 tests/testthat/test-importBugphyzz.R     | 306 ++++++++++++-----------
 tests/testthat/test-makeSignatures.R     |   8 +-
 tests/testthat/test-physiologies.R       |   4 +-
 4 files changed, 168 insertions(+), 160 deletions(-)

diff --git a/tests/testthat/test-getTaxonSignatures.R b/tests/testthat/test-getTaxonSignatures.R
index 157d0567..a31e1858 100644
--- a/tests/testthat/test-getTaxonSignatures.R
+++ b/tests/testthat/test-getTaxonSignatures.R
@@ -3,13 +3,13 @@ taxName <- "Escherichia coli"
 bp <- importBugphyzz()
 sigs_ids <- getTaxonSignatures(taxID, bp)
 sigs_tax <- getTaxonSignatures(
-  tax = taxName, bp = bp, tax_id_type = "Taxon_name"
+    tax = taxName, bp = bp, tax_id_type = "Taxon_name"
 )
 test_that("getTaxonSignatures works with IDs", {
-  expect_gt(length(sigs_ids), 0)
-  expect_type(sigs_ids, "character")
+    expect_gt(length(sigs_ids), 0)
+    expect_type(sigs_ids, "character")
 })
 test_that("getTaxonSignatures works with IDs", {
-  expect_gt(length(sigs_tax), 0)
-  expect_type(sigs_tax, "character")
+    expect_gt(length(sigs_tax), 0)
+    expect_type(sigs_tax, "character")
 })
diff --git a/tests/testthat/test-importBugphyzz.R b/tests/testthat/test-importBugphyzz.R
index a64ed06d..de7dcaf8 100644
--- a/tests/testthat/test-importBugphyzz.R
+++ b/tests/testthat/test-importBugphyzz.R
@@ -4,184 +4,192 @@
 library(purrr)
 
 expected_columns_multistate <- c(
-  NCBI_ID = "integer", Taxon_name = "character",
-  Rank = "character",
-  Attribute = "character", Attribute_value = "character",
-  Evidence = "character",
-  Frequency = "character", Score = "double", Attribute_source = "character",
-  Confidence_in_curation = "character", Attribute_type = "character",
-  Validation = "double"
+    NCBI_ID = "integer", Taxon_name = "character",
+    Rank = "character",
+    Attribute = "character", Attribute_value = "character",
+    Evidence = "character",
+    Frequency = "character", Score = "double", Attribute_source = "character",
+    Confidence_in_curation = "character", Attribute_type = "character",
+    Validation = "double"
 )
 expected_columns_binary <- c(
-  NCBI_ID = "integer", Taxon_name = "character",
-  Rank = "character",
-  Attribute = "character", Attribute_value = "logical",
-  Evidence = "character",
-  Frequency = "character", Score = "double", Attribute_source = "character",
-  Confidence_in_curation = "character", Attribute_type = "character",
-  Validation = "double"
+    NCBI_ID = "integer", Taxon_name = "character",
+    Rank = "character",
+    Attribute = "character", Attribute_value = "logical",
+    Evidence = "character",
+    Frequency = "character", Score = "double", Attribute_source = "character",
+    Confidence_in_curation = "character", Attribute_type = "character",
+    Validation = "double"
 )
 expected_columns_numeric <- c(
-  NCBI_ID = "integer", Taxon_name = "character",
-  Rank = "character",
-  Attribute = "character", Attribute_value = "double",
-  Evidence = "character",
-  Frequency = "character", Score = "double", Attribute_source = "character",
-  Confidence_in_curation = "character", Attribute_type = "character",
-  NSTI = "double", Validation = "double"
+    NCBI_ID = "integer", Taxon_name = "character",
+    Rank = "character",
+    Attribute = "character", Attribute_value = "double",
+    Evidence = "character",
+    Frequency = "character", Score = "double", Attribute_source = "character",
+    Confidence_in_curation = "character", Attribute_type = "character",
+    NSTI = "double", Validation = "double"
 )
 
 checkColumnNames <- function(x) {
-  attr_type <- unique(x$Attribute_type)
-  if (attr_type == "multistate-intersection" | attr_type == "multistate-union") {
-    lgl_vct <- colnames(x) == names(expected_columns_multistate)
-  } else if (attr_type == "binary") {
-    lgl_vct <- colnames(x) == names(expected_columns_binary)
-  } else if (attr_type == "numeric") {
-    lgl_vct <- colnames(x) == names(expected_columns_numeric)
-  }
-  return(all(lgl_vct))
+    attr_type <- unique(x$Attribute_type)
+    if (attr_type == "multistate-intersection" | attr_type == "multistate-union") {
+        lgl_vct <- colnames(x) == names(expected_columns_multistate)
+    } else if (attr_type == "binary") {
+        lgl_vct <- colnames(x) == names(expected_columns_binary)
+    } else if (attr_type == "numeric") {
+        lgl_vct <- colnames(x) == names(expected_columns_numeric)
+    }
+    return(all(lgl_vct))
 }
 
 checkColumnTypes <- function(x) {
-  attr_type <- unique(x$Attribute_type)
-  if ("Validation" %in% colnames(x)) {
-    x$Validation <- as.double(x$Validation)
-  }
-  if ("NSTI" %in% colnames(x)) {
-    x$NSTI <- as.double(x$NSTI)
-  }
-  types <- map_chr(x, typeof)
-  if (attr_type == "multistate-intersection" | attr_type == "multistate-union") {
-    lgl_vct <- types == expected_columns_multistate
-  } else if (attr_type == "binary") {
-    lgl_vct <- types == expected_columns_binary
-  } else if (attr_type == "numeric") {
-    lgl_vct <- types == expected_columns_numeric
-  }
-  return(all(lgl_vct))
+    attr_type <- unique(x$Attribute_type)
+    if ("Validation" %in% colnames(x)) {
+        x$Validation <- as.double(x$Validation)
+    }
+    if ("NSTI" %in% colnames(x)) {
+        x$NSTI <- as.double(x$NSTI)
+    }
+    types <- map_chr(x, typeof)
+    if (attr_type == "multistate-intersection" | attr_type == "multistate-union") {
+        lgl_vct <- types == expected_columns_multistate
+    } else if (attr_type == "binary") {
+        lgl_vct <- types == expected_columns_binary
+    } else if (attr_type == "numeric") {
+        lgl_vct <- types == expected_columns_numeric
+    }
+    return(all(lgl_vct))
 }
 
 checkNAs <- function(x) {
-  select_col <- c(
-    "NCBI_ID",
-    "Taxon_name",
-    "Rank",
-    "Attribute",
-    "Attribute_value",
-    "Attribute_type",
-    "Evidence",
-    "Frequency",
-    "Score"
-  )
-  x <- x[, select_col]
-  !any(purrr::map_lgl(x, ~ any(is.na(.x))))
+    select_col <- c(
+        "NCBI_ID",
+        "Taxon_name",
+        "Rank",
+        "Attribute",
+        "Attribute_value",
+        "Attribute_type",
+        "Evidence",
+        "Frequency",
+        "Score"
+    )
+    x <- x[, select_col]
+    !any(purrr::map_lgl(x, ~ any(is.na(.x))))
 }
 
 checkCuration <- function(x) {
 
-  ## Valid values
-  Rank_vals <- c(
-    "superkingdom", "kingdom", "domain", "phylum", "class", "order",
-    "family", "genus","species", "strain"
-  )
-  Attribute_type_vals <- c(
-    "multistate-union", "multistate-intersection", "binary",
-    "numeric"
-  )
-  Frequency_vals <- c(
-    "always", "usually", "sometimes", "rarely", "never", "unknown"
-  )
-  Evidence_vals <- c(
-    "exp", "igc", "nas", "tas", "tax", "asr"
-  )
-  Confidence_in_curation_vals <- c(
-    "high", "medium", "low"
-  )
-
-  attr_type <- unique(x$Attribute_type)
-
-  ## Columns omitted here are tested elsewhere
-  ## Attribute_type for numeric values
-  ## Score
-  ## NSTI
-  ## Validation
-  ## NCBI_ID
-  ## Taxon_name
-
-  Rank_ok <- all(x$Rank %in% Rank_vals)
-  Attribute_type_ok <- attr_type %in% Attribute_type_vals
-  Evidence_ok <- all(x$Evidence %in% Evidence_vals)
-  Frequency_ok <- all(unique(x$Frequency) %in% Frequency_vals)
-  # Score_ok <- all(as.double(na.omit(x$Score)) >=0 & as.double(na.omit(x$Score <=1)))
-  Confidence_in_curation_ok <- all(na.omit(x$Confidence_in_curation) %in% Confidence_in_curation_vals)
-
-  ## Attribute_source
-  srcs_tsv <- system.file("extdata", "attribute_sources.tsv", package = "bugphyzz")
-  srcs <- readr::read_tsv(srcs_tsv, show_col_types = FALSE)
-  Attribute_source_ok <- all(na.omit(x$Attribute_source) %in% srcs$Attribute_source)
-
-  ## Attribute
-  attrs_tsv <- system.file("extdata", "attributes.tsv", package = "bugphyzz")
-  attrs_tbl <- readr::read_tsv(attrs_tsv, show_col_types = FALSE) |>
-    dplyr::mutate(attribute_group = tolower(attribute_group))
-  attrs <- attrs_tbl |>
-    dplyr::mutate(attribute_group = strsplit(attribute_group, ";")) |>
-    dplyr::pull(attribute_group) |>
-    unlist() |>
-    unique() |>
-    {\(y)  y[!is.na(y)]}()
-  Attribute_ok <- unique(x$Attribute) %in% attrs
-
-  results <- c(
-      Rank = Rank_ok,
-      Attribute_type = Attribute_type_ok,
-      Frequency = Frequency_ok,
-      Evidence = Evidence_ok,
-      Confidence_in_curation = Confidence_in_curation_ok,
-      Attribute_source = Attribute_source_ok,
-      Attribute = Attribute_ok
+    ## Valid values
+    Rank_vals <- c(
+        "superkingdom", "kingdom", "domain", "phylum", "class", "order",
+        "family", "genus","species", "strain"
+    )
+    Attribute_type_vals <- c(
+        "multistate-union", "multistate-intersection", "binary",
+        "numeric"
+    )
+    Frequency_vals <- c(
+        "always", "usually", "sometimes", "rarely", "never", "unknown"
+    )
+    Evidence_vals <- c(
+        "exp", "igc", "nas", "tas", "tax", "asr"
+    )
+    Confidence_in_curation_vals <- c(
+        "high", "medium", "low"
     )
 
-  ## Attribute_value
-  if (attr_type %in% c("multistate-union", "multistate-intersection")) {
-    Attribute_value_vals <- attrs_tbl |>
-      dplyr::filter(grepl(unique(x$Attribute), attribute_group)) |>
-      dplyr::pull(attribute) |>
-      unique() |>
-      {\(y)  y[!is.na(y)]}() |>
-      tolower()
-
-   Attribute_value_ok <- all(x$Attribute_value %in% Attribute_value_vals)
-   results <- c(results, Attribute_value = Attribute_value_ok)
-  }
-  return(all(results))
-  # return(results)
+    attr_type <- unique(x$Attribute_type)
+
+    ## Columns omitted here are tested elsewhere
+    ## Attribute_type for numeric values
+    ## Score
+    ## NSTI
+    ## Validation
+    ## NCBI_ID
+    ## Taxon_name
+
+    Rank_ok <- all(x$Rank %in% Rank_vals)
+    Attribute_type_ok <- attr_type %in% Attribute_type_vals
+    Evidence_ok <- all(x$Evidence %in% Evidence_vals)
+    Frequency_ok <- all(unique(x$Frequency) %in% Frequency_vals)
+    # Score_ok <- all(as.double(na.omit(x$Score)) >=0 & as.double(na.omit(x$Score <=1)))
+    Confidence_in_curation_ok <- all(na.omit(x$Confidence_in_curation) %in% Confidence_in_curation_vals)
+
+    ## Attribute_source
+    srcs_tsv <- system.file("extdata", "attribute_sources.tsv", package = "bugphyzz")
+    srcs <- readr::read_tsv(srcs_tsv, show_col_types = FALSE)
+    Attribute_source_ok <- all(na.omit(x$Attribute_source) %in% srcs$Attribute_source)
+
+    ## Attribute
+    attrs_tsv <- system.file("extdata", "attributes.tsv", package = "bugphyzz")
+    attrs_tbl <- readr::read_tsv(attrs_tsv, show_col_types = FALSE) |>
+        dplyr::mutate(attribute_group = tolower(attribute_group))
+    attrs <- attrs_tbl |>
+        dplyr::mutate(attribute_group = strsplit(attribute_group, ";")) |>
+        dplyr::pull(attribute_group) |>
+        unlist() |>
+        unique() |>
+        {\(y)  y[!is.na(y)]}()
+    Attribute_ok <- unique(x$Attribute) %in% attrs
+
+    results <- c(
+        Rank = Rank_ok,
+        Attribute_type = Attribute_type_ok,
+        Frequency = Frequency_ok,
+        Evidence = Evidence_ok,
+        Confidence_in_curation = Confidence_in_curation_ok,
+        Attribute_source = Attribute_source_ok,
+        Attribute = Attribute_ok
+    )
+
+    ## Attribute_value
+    if (attr_type %in% c("multistate-union", "multistate-intersection")) {
+        Attribute_value_vals <- attrs_tbl |>
+            dplyr::filter(grepl(unique(x$Attribute), attribute_group)) |>
+            dplyr::pull(attribute) |>
+            unique() |>
+            {\(y)  y[!is.na(y)]}() |>
+            tolower()
+
+        Attribute_value_ok <- all(x$Attribute_value %in% Attribute_value_vals)
+        results <- c(results, Attribute_value = Attribute_value_ok)
+    }
+    return(all(results))
+}
+
+## Make sure that there are not duplicates by accident
+checkUniqueAnnotations <- function(x) {
+    dat <- x |>
+        dplyr::select(NCBI_ID, Attribute, Attribute_value)
+    !any(duplicated(dat))
 }
 
 # tests -------------------------------------------------------------------
 
 test_that("importBugphyzz works with devel", {
-  bp <- importBugphyzz(version = "devel", force_download = TRUE)
-  expect_true(all("data.frame" == map_chr(bp, class)))
-  expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
-  expect_true(all(map_lgl(bp, checkColumnNames)))
-  expect_true(all(map_lgl(bp, checkColumnTypes)))
-  expect_true(all(map_lgl(bp, checkNAs)))
-  expect_true(all(map_lgl(bp, checkCuration)))
+    bp <- importBugphyzz(version = "devel", force_download = TRUE)
+    expect_true(all("data.frame" == map_chr(bp, class)))
+    expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
+    expect_true(all(map_lgl(bp, checkColumnNames)))
+    expect_true(all(map_lgl(bp, checkColumnTypes)))
+    expect_true(all(map_lgl(bp, checkNAs)))
+    expect_true(all(map_lgl(bp, checkCuration)))
+    expect_true(all(map_lgl(bp, checkUniqueAnnotations)))
 })
 
 test_that("importBugphyzz works with hash", {
-  bp <- importBugphyzz(version = "c2d34c0", force_download = TRUE)
-  expect_true(all("data.frame" == map_chr(bp, class)))
-  expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
-  expect_true(all(map_lgl(bp, checkColumnNames)))
-  expect_true(all(map_lgl(bp, checkColumnTypes)))
-  expect_true(all(map_lgl(bp, checkNAs)))
-  expect_true(all(map_lgl(bp, checkCuration)))
+    bp <- importBugphyzz(version = "c2d34c0", force_download = TRUE)
+    expect_true(all("data.frame" == map_chr(bp, class)))
+    expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
+    expect_true(all(map_lgl(bp, checkColumnNames)))
+    expect_true(all(map_lgl(bp, checkColumnTypes)))
+    expect_true(all(map_lgl(bp, checkNAs)))
+    expect_true(all(map_lgl(bp, checkCuration)))
+    expect_true(all(map_lgl(bp, checkUniqueAnnotations)))
 })
 
 ## TODO create test for using Zenodo
 test_that("importBugphyzz doesn't work with other words", {
-  expect_error(importBugphyzz(version = "abcd-1234", force_download = TRUE))
+    expect_error(importBugphyzz(version = "abcd-1234", force_download = TRUE))
 })
diff --git a/tests/testthat/test-makeSignatures.R b/tests/testthat/test-makeSignatures.R
index dfc5e5ed..a8fc7d25 100644
--- a/tests/testthat/test-makeSignatures.R
+++ b/tests/testthat/test-makeSignatures.R
@@ -1,13 +1,13 @@
 library(purrr)
 bp <- importBugphyzz()
 sigsNames <- map(bp, ~ makeSignatures(.x, tax_id_type = "Taxon_name")) |>
-  list_flatten(name_spec = "{inner}")
+    list_flatten(name_spec = "{inner}")
 sigsIDs <- map(bp, ~ makeSignatures(.x, tax_id_type = "NCBI_ID")) |>
-  list_flatten(name_spec = "{inner}")
+    list_flatten(name_spec = "{inner}")
 
 test_that("makeSignatures works with IDs", {
-  expect_true(all(map_lgl(sigsIDs, is.integer)))
+    expect_true(all(map_lgl(sigsIDs, is.integer)))
 })
 test_that("makeSignatures works with taxon names", {
-  expect_true(all(map_lgl(sigsNames, is.character)))
+    expect_true(all(map_lgl(sigsNames, is.character)))
 })
diff --git a/tests/testthat/test-physiologies.R b/tests/testthat/test-physiologies.R
index 1cad2bed..a7dbf441 100644
--- a/tests/testthat/test-physiologies.R
+++ b/tests/testthat/test-physiologies.R
@@ -2,6 +2,6 @@ library(purrr)
 pnames <- showPhys()
 p <- suppressWarnings(physiologies())
 test_that("physiologies works", {
-  expect_true(all(map_lgl(p, is.data.frame)))
-  expect_true(all(names(p) == pnames))
+    expect_true(all(map_lgl(p, is.data.frame)))
+    expect_true(all(names(p) == pnames))
 })