Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed paste error in rarefy_even_dept & improved efficiency #1630

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
442 changes: 233 additions & 209 deletions R/transform_filter-methods.R

Large diffs are not rendered by default.

88 changes: 44 additions & 44 deletions tests/testthat/test-IO.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mothlist <- system.file("extdata", "esophagus.fn.list.gz", package="phyloseq")
mothgroup <- system.file("extdata", "esophagus.good.groups.gz", package="phyloseq")
mothtree <- system.file("extdata", "esophagus.tree.gz", package="phyloseq")
cutoff <- "0.10"
esophman <- import_mothur(mothlist, mothgroup, mothtree, cutoff)
esophman <- import_mothur(mothlist, mothgroup, mothtree, cutoff)
# mothur "Shared" file, create with mothur from these example data files
mothshared = system.file("extdata", "esophagus.fn.shared.gz", package="phyloseq")
constaxonomy = system.file("extdata", "mothur_example.cons.taxonomy.gz", package="phyloseq")
Expand All @@ -26,7 +26,7 @@ test_that("import_mothur: The two phyloseq objects, example and just-imported, a

test_that("import_mothur: Test mothur file import on the (esophagus data).", {
smlc <- show_mothur_cutoffs(mothlist)
expect_that(smlc, is_equivalent_to(c("unique", "0.00", "0.01", "0.02", "0.03", "0.04", "0.05", "0.06", "0.07", "0.08", "0.09", "0.10")))
expect_that(smlc, is_equivalent_to(c("unique", "0.00", "0.01", "0.02", "0.03", "0.04", "0.05", "0.06", "0.07", "0.08", "0.09", "0.10")))
})

test_that("import_mothur: abundances can be manipulated mathematically", {
Expand All @@ -45,7 +45,7 @@ test_that("import_mothur: Expected classes of non-empty components", {
})

test_that("import_mothur: imported files become S4 object", {
expect_that(isS4(esophman), is_true())
expect_true(isS4(esophman))
})

test_that("import_mothur: show method output tests", {
Expand All @@ -70,7 +70,7 @@ test_that("the import_RDP_otu function can properly read gzipped-example", {
otufile <- system.file("extdata",
"rformat_dist_0.03.txt.gz",
package="phyloseq")
ex_otu <- import_RDP_otu(otufile)
ex_otu <- import_RDP_otu(otufile)
# test expectations
expect_output(print(head(t(ex_otu))), "OTU Table:")
expect_is(ex_otu, "otu_table")
Expand All @@ -97,7 +97,7 @@ test_that("Classes of components are as expected", {
expect_is(otu_table(t0), ("otu_table"))
expect_is(tax_table(t0), ("taxonomyTable"))
expect_is(sample_data(t0), ("sample_data"))
expect_is(phy_tree(t0), ("phylo"))
expect_is(phy_tree(t0), ("phylo"))
expect_is(refseq(t0), ("DNAStringSet"))
})

Expand All @@ -114,64 +114,64 @@ test_that("Features of the abundance data are consistent, match known values", {

test_that("Features of the taxonomy table match expected values", {
expect_equal(length(rank_names(t0)), (7L))
expect_equal(rank_names(t0),
expect_equal(rank_names(t0),
c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"))
tax53 = as(tax_table(t0), "matrix")[53, ]
expect_equivalent(
tax53,
tax53,
c("Bacteria", "Proteobacteria", "Deltaproteobacteria",
"Desulfovibrionales", "Desulfomicrobiaceae",
"Desulfovibrionales", "Desulfomicrobiaceae",
"Desulfomicrobium", "Desulfomicrobiumorale"))
})
################################################################################
# parse function tests - note, these are also used by import_biom

test_that("Taxonomy vector parsing functions behave as expected", {

chvec1 = c("Bacteria", "Proteobacteria", "Gammaproteobacteria",
"Enterobacteriales", "Enterobacteriaceae", "Escherichia")

chvec2 = c("k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria",
"o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__")

chvec3 = c("Root", "k__Bacteria", "p__Firmicutes", "c__Bacilli",
"o__Bacillales", "f__Staphylococcaceae")

# Example where only some entries have greengenes prefix.
chvec4 = c("Root", "k__Bacteria", "Firmicutes", "c__Bacilli",
"o__Bacillales", "Staphylococcaceae", "z__mistake")

# Even more terrible example, where leading or trailing space characters included
# (the exact weirdnes of chvec4, compounded by leading and/or trailing space characters)
chvec5 = c(" Root \n ", " k__Bacteria", " Firmicutes", " c__Bacilli ",
"o__Bacillales ", "Staphylococcaceae ", "\t z__mistake \t\n")
"o__Bacillales ", "Staphylococcaceae ", "\t z__mistake \t\n")

# This should give a warning because there were no greengenes prefixes
expect_warning(t1 <- parse_taxonomy_greengenes(chvec1))
# And output from previous call, t1, should be identical to default
expect_that(parse_taxonomy_default(chvec1), is_equivalent_to(t1))

# All the greengenes entries get trimmed by parse_taxonomy_greengenes
expect_that(all(sapply(chvec2, nchar) > sapply(parse_taxonomy_greengenes(chvec2), nchar)), is_true())
expect_true(all(sapply(chvec2, nchar) > sapply(parse_taxonomy_greengenes(chvec2), nchar)))
# None of the greengenes entries are trimmed by parse_taxonomy_default
expect_that(any(sapply(chvec2, nchar) > sapply(parse_taxonomy_default(chvec2), nchar)), is_false())
expect_false(any(sapply(chvec2, nchar) > sapply(parse_taxonomy_default(chvec2), nchar)))

# Check that the "Root" element is not removed by parse_taxonomy_greengenes and parse_taxonomy_default.
expect_that("Root" %in% chvec3, is_true())
expect_that("Root" %in% parse_taxonomy_default(chvec3), is_true())
expect_that(length(parse_taxonomy_default(chvec3)) == length(chvec3), is_true())
expect_true("Root" %in% chvec3)
expect_true("Root" %in% parse_taxonomy_default(chvec3))
expect_true(length(parse_taxonomy_default(chvec3)) == length(chvec3))

# Check that non-greengenes prefixes, and those w/o prefixes, are given dummy rank(s)
chvec4ranks = names(parse_taxonomy_greengenes(chvec4))
expect_that(grep("Rank", chvec4ranks, fixed=TRUE), is_equivalent_to(c(1, 3, 6, 7)))
# Check that everything given dummy rank in default parse.
chvec4ranks = names(parse_taxonomy_default(chvec4))
expect_that(grep("Rank", chvec4ranks, fixed=TRUE), is_equivalent_to(1:7))

# chvec4 and chvec5 result in identical vectors.
expect_that(parse_taxonomy_default(chvec4), is_equivalent_to(parse_taxonomy_default(chvec5)))
expect_that(parse_taxonomy_greengenes(chvec4), is_equivalent_to(parse_taxonomy_greengenes(chvec5)))
expect_that(parse_taxonomy_greengenes(chvec4), is_equivalent_to(parse_taxonomy_greengenes(chvec5)))

# The names of chvec5, greengenes parsed, should be...
correct5names = c("Rank1", "Kingdom", "Rank3", "Class", "Order", "Rank6", "Rank7")
expect_that(names(parse_taxonomy_greengenes(chvec5)), is_equivalent_to(correct5names))
Expand All @@ -198,7 +198,7 @@ test_that("Importing biom files yield phyloseq objects", {

expect_is(rich_dense, ("phyloseq"))
expect_is(rich_sparse, ("phyloseq"))

expect_equal(ntaxa(rich_dense), (5L))
expect_equal(ntaxa(rich_sparse), (5L))

Expand All @@ -221,7 +221,7 @@ test_that("The different types of biom files yield phyloseq objects", {
rich_sparse = import_biom(rich_sparse_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
min_dense = import_biom(min_dense_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
min_sparse = import_biom(min_sparse_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)

expect_is(rich_dense, ("phyloseq"))
expect_is(rich_sparse, ("phyloseq"))
expect_is(min_dense, ("phyloseq"))
Expand All @@ -230,7 +230,7 @@ test_that("The different types of biom files yield phyloseq objects", {
expect_equal(ntaxa(rich_dense), (5L))
expect_equal(ntaxa(rich_sparse), (5L))
expect_equal(ntaxa(min_dense), (5L))
expect_equal(ntaxa(min_sparse), (5L))
expect_equal(ntaxa(min_sparse), (5L))

# # Component classes
# sample_data
Expand All @@ -243,8 +243,8 @@ test_that("The different types of biom files yield phyloseq objects", {
expect_is(access(rich_dense, "tax_table"), ("taxonomyTable"))
expect_is(access(rich_sparse, "tax_table"), ("taxonomyTable"))
expect_is(access(min_dense, "tax_table"), ("NULL"))
expect_is(access(min_sparse, "tax_table"), ("NULL"))
expect_is(access(min_sparse, "tax_table"), ("NULL"))

# phylo tree
expect_is(access(rich_dense, "phy_tree"), ("phylo"))
expect_is(access(rich_sparse, "phy_tree"), ("phylo"))
Expand All @@ -259,14 +259,14 @@ test_that("The different types of biom files yield phyloseq objects", {
expect_is(access(rich_dense, "refseq"), ("DNAStringSet"))
expect_is(access(rich_sparse, "refseq"), ("DNAStringSet"))
expect_is(access(min_dense, "refseq"), ("DNAStringSet"))
expect_is(access(min_sparse, "refseq"), ("DNAStringSet"))
# otu_table
expect_is(access(min_sparse, "refseq"), ("DNAStringSet"))

# otu_table
expect_is(access(rich_dense, "otu_table"), ("otu_table"))
expect_is(access(rich_sparse, "otu_table"), ("otu_table"))
expect_is(access(min_dense, "otu_table"), ("otu_table"))
expect_is(access(min_sparse, "otu_table"), ("otu_table"))

# Compare values in the otu_table. For some reason the otu_tables are not identical
# one position is plus-two, another is minus-two
combrich <- c(access(rich_dense, "otu_table"), access(rich_sparse, "otu_table"))
Expand All @@ -283,15 +283,15 @@ test_that("The different types of biom files yield phyloseq objects", {

# Compare values in the sample_data
expect_equivalent(access(rich_dense, "sam_data"), (access(rich_sparse, "sam_data")))

# Compare values in the taxonomyTable
expect_equivalent(access(rich_dense, "tax_table"), (access(rich_sparse, "tax_table")))

})

test_that("the import_biom and import(\"biom\", ) syntax give same result", {
x1 <- import_biom(rich_dense_biom, parseFunction=parse_taxonomy_greengenes)
x2 <- import("biom", BIOMfilename=rich_dense_biom, parseFunction=parse_taxonomy_greengenes)
x2 <- import("biom", BIOMfilename=rich_dense_biom, parseFunction=parse_taxonomy_greengenes)
expect_equivalent(x1, x2)
})
################################################################################
Expand All @@ -303,7 +303,7 @@ test_that("The read_tree function works as expected:", {
expect_equal(ntaxa(GPNewick), 500L)
expect_equal(GPNewick$Nnode, 499L)
expect_equivalent(taxa_names(GPNewick), GPNewick$tip.label)
# Now read a nexus tree...
# Now read a nexus tree...
# Some error-handling expectations
expect_that(read_tree("alskflsakjsfskfhas.akshfaksj"), gives_warning()) # file not exist
not_tree <- system.file("extdata", "esophagus.good.groups.gz", package="phyloseq")
Expand All @@ -328,7 +328,7 @@ test_that("The specialized read_tree_greengenes function works:", {
################################################################################
# microbio_me_qiime tests
# This tests different features and expected behavior for
# the functioning of an interface function to the
# the functioning of an interface function to the
# microbio.me/qiime data repository.
#
zipfile = "study_816_split_library_seqs_and_mapping.zip"
Expand All @@ -339,9 +339,9 @@ tarps = suppressWarnings(microbio_me_qiime(tarfile))
zipps = suppressWarnings(microbio_me_qiime(zipfile))
# This function is intended to interface with an external server,
# as described in the documentation.
# However, I don't want successful testing of this package to
# rely on the presence or form of particular files on an
# external server, so these tests will be done exclusively on
# However, I don't want successful testing of this package to
# rely on the presence or form of particular files on an
# external server, so these tests will be done exclusively on
# compressed file(s) representing what is exposed by the data server
# It is up to the user to provide valid a URL in practice,
# and the function attempts to provide informative status
Expand All @@ -353,14 +353,14 @@ test_that("The microbio_me_qiime imports as expected: .tar.gz", {
expect_identical(nrow(otu_table(tarps)), 50L)
expect_identical(nrow(sample_data(tarps)), 15L)
})
test_that("The microbio_me_qiime imports as expected: .zip", {
test_that("The microbio_me_qiime imports as expected: .zip", {
expect_is(zipps, "phyloseq")
expect_is(sample_data(zipps, errorIfNULL=FALSE), "sample_data")
expect_is(otu_table(zipps, errorIfNULL=FALSE), "otu_table")
expect_identical(nrow(otu_table(zipps)), 50L)
expect_identical(nrow(sample_data(zipps)), 15L)
})
test_that("Results of .tar.gz and .zip should be identical", {
test_that("Results of .tar.gz and .zip should be identical", {
expect_identical(tarps, zipps)
expect_identical(sample_data(tarps), sample_data(zipps))
expect_identical(otu_table(tarps), otu_table(zipps))
Expand All @@ -370,7 +370,7 @@ test_that("Results of .tar.gz and .zip should be identical", {
################################################################################
usearchfile = system.file("extdata", "usearch.uc", package="phyloseq")
OTU1 = import_usearch_uc(usearchfile)
test_that("import_usearch_uc: Properly omit entries from failed search", {
test_that("import_usearch_uc: Properly omit entries from failed search", {
ucLines = readLines(usearchfile)
expect_identical( sum(OTU1), (length(ucLines) - length(grep("*", ucLines, fixed=TRUE))) )
expect_identical( nrow(OTU1), 37L)
Expand All @@ -379,4 +379,4 @@ test_that("import_usearch_uc: Properly omit entries from failed search", {
expect_identical( ncol(OTU1), 33L)
expect_equivalent(colSums(OTU1)[1:6], c(6, 1, 2, 1, 1, 1))
})
################################################################################
################################################################################
Loading