From b8b345ad57bdb2178097bd6aee568d910d7dca90 Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Tue, 4 Jun 2024 14:14:42 +0300 Subject: [PATCH 01/21] updated example in docs --- R/plotAbundance.R | 4 ++-- man/plotAbundance.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/plotAbundance.R b/R/plotAbundance.R index 29f2adad..b6df4844 100644 --- a/R/plotAbundance.R +++ b/R/plotAbundance.R @@ -72,8 +72,8 @@ #' data(GlobalPatterns, package="mia") #' se <- GlobalPatterns #' -#' ## Plotting abundance using the first taxonomic rank as default -#' plotAbundance(se, assay.type="counts") +#' ## Plotting counts using the first taxonomic rank as default +#' plotAbundance(se, assay.type="counts", use_relative=FALSE) + labs(y="Counts") #' #' ## Using "Phylum" as rank #' plotAbundance(se, assay.type="counts", rank = "Phylum", add_legend = FALSE) diff --git a/man/plotAbundance.Rd b/man/plotAbundance.Rd index eb29a844..5a234fd6 100644 --- a/man/plotAbundance.Rd +++ b/man/plotAbundance.Rd @@ -96,8 +96,8 @@ features present. data(GlobalPatterns, package="mia") se <- GlobalPatterns -## Plotting abundance using the first taxonomic rank as default -plotAbundance(se, assay.type="counts") +## Plotting counts using the first taxonomic rank as default +plotAbundance(se, assay.type="counts", use_relative=FALSE) + labs(y="Counts") ## Using "Phylum" as rank plotAbundance(se, assay.type="counts", rank = "Phylum", add_legend = FALSE) From a5c80257d0fad5417dc5a00aa6731b1784ab4680 Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Thu, 6 Jun 2024 15:42:00 +0300 Subject: [PATCH 02/21] ran all contibutor checks - no errors relating to my PR, and fixed typo in a unittest, edited gitignore/Rbuildignore --- .Rbuildignore | 3 +++ .gitignore | 3 +++ tests/testthat/test-2plotSeries.R | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index 0f56b95e..c1bd9a89 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ +^renv$ +^renv\.lock$ ^miaViz\.Rproj$ ^\.Rproj\.user$ .github @@ -7,3 +9,4 @@ inst/extras/ ^doc$ ^Meta$ +^\.Rprofile$ diff --git a/.gitignore b/.gitignore index 2bab5502..fa29b55a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ docs .DS_Store /doc/ /Meta/ +renv/ +.Rprofile + diff --git a/tests/testthat/test-2plotSeries.R b/tests/testthat/test-2plotSeries.R index 7970a643..6ba2ee90 100644 --- a/tests/testthat/test-2plotSeries.R +++ b/tests/testthat/test-2plotSeries.R @@ -2,7 +2,7 @@ context("plot series") test_that("plot series", { # Load data from miaTime package - skip_if_not_installed("miatime") + skip_if_not_installed("miaTime") data(SilvermanAGutData, package = "miaTime") tse <- SilvermanAGutData tse_sub <- tse[1:5] From 80f7cc2a9cb44262a9a09df6fe41f5099c5c7e1c Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Thu, 13 Jun 2024 12:03:10 +0300 Subject: [PATCH 03/21] adding sorting method --- .gitignore | 3 +- NAMESPACE | 1 + R/neatsort.R | 208 ++++++++++++++++++++++++++ man/dot-check_neatsort_args.Rd | 22 +++ man/dot-get_sorted_rownames.Rd | 19 +++ man/dot-radial_theta.Rd | 19 +++ man/dot-take_dimensions.Rd | 19 +++ man/dot-take_subset.Rd | 19 +++ man/neatsort.Rd | 79 ++++++++++ tests/testthat/test-neatsort.R | 263 +++++++++++++++++++++++++++++++++ 10 files changed, 651 insertions(+), 1 deletion(-) create mode 100644 R/neatsort.R create mode 100644 man/dot-check_neatsort_args.Rd create mode 100644 man/dot-get_sorted_rownames.Rd create mode 100644 man/dot-radial_theta.Rd create mode 100644 man/dot-take_dimensions.Rd create mode 100644 man/dot-take_subset.Rd create mode 100644 man/neatsort.Rd create mode 100644 tests/testthat/test-neatsort.R diff --git a/.gitignore b/.gitignore index fa29b55a..3e1a08c2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,6 @@ docs /doc/ /Meta/ renv/ +renv.lock .Rprofile - +.idea diff --git a/NAMESPACE b/NAMESPACE index 910afe9b..363a4b4e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ exportMethods("colTreeData<-") exportMethods("rowTreeData<-") exportMethods(colTreeData) exportMethods(combineTreeData) +exportMethods(neatsort) exportMethods(plotAbundance) exportMethods(plotAbundanceDensity) exportMethods(plotCCA) diff --git a/R/neatsort.R b/R/neatsort.R new file mode 100644 index 00000000..ef2613f2 --- /dev/null +++ b/R/neatsort.R @@ -0,0 +1,208 @@ +#' Sorting by radial theta angle +#' +#' @description \code{neatsort} sorts already ordinated data by the radial theta angle. +#' This method is useful for organizing data points based on their angular +#' position in a 2D space, typically after an ordination technique such as PCA or NMDS +#' has been applied. +#' +#' The function takes in a matrix of ordinated data, optionally +#' centers the data using specified methods (mean, median, or none), and then calculates +#' the angle (theta) for each point relative to the centroid. The data points are then +#' sorted based on these theta values in either ascending or descending order. +#' +#' One significant application of this sorting method is in plotting heatmaps. +#' By using radial theta sorting, the relationships between data points can be preserved +#' according to the ordination method's spatial configuration, rather than relying on +#' hierarchical clustering, which may distort these relationships. This approach +#' allows for a more faithful representation of the data's intrinsic structure as captured +#' by the ordination process. +#' +#' @param x A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g., features or samples). +#' @param subset A vector specifying a subset of rows to be used and retained. If NULL, all rows are used. +#' @param dimensions A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates. +#' @param centering_method A character string specifying the method to center the data. Options are "mean", "median", or "none" if your data is already centred. +#' @param sorting_order A character string specifying the order of sorting. Options are "ascending" or "descending". +#' @param ... Additional arguments passed to other methods. +#' @return A vector of row names in the sorted order. +#' +#' @name neatsort +#' +#' @examples +#' # Load required libraries +#' library(mia) +#' +#' # Load the dataset +#' data(peerj13075) +#' +#' # Agglomerate by Order and transform the data +#' tse_order <- mergeFeaturesByRank(peerj13075, rank = "order", onRankOnly = TRUE) +#' tse_order <- transformAssay(tse_order, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") +#' tse_order <- transformAssay(tse_order, assay.type="relabundance", method="z", MARGIN = "features", name="z") +#' z_transformed_data <- assay(tse_order, "z") +#' +#' # Get the top taxa and perform PCA +#' top_taxa <- getTopFeatures(tse_order, top = 10, assay.type="z") +#' top_feature_data <- z_transformed_data[top_taxa, ] +#' pca_results <- prcomp(top_feature_data, scale = TRUE) +#' scores_pca <- pca_results$x[, 1:2] +#' +#' # Sort by radial theta and subset the transformed data +#' sorted_order <- neatsort(scores_pca, dimensions = c(1, 2), centering_method = "mean", sorting_order = "ascending") +#' ordered_transformed_data <- z_transformed_data[sorted_order, ] +NULL + +#' @rdname neatsort +setGeneric("neatsort", signature = c("x"), + function(x, ...) + standardGeneric("neatsort")) + + +#' .check_neatsort_args +#' @description Checks if the input arguments for the neatsort function are valid. +#' @param x A matrix containing the ordinated data to be sorted. +#' @param subset A vector specifying a subset of rows to be used and retained. +#' @param dimensions A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates. +#' @param centering_method A character string specifying the method to center the data. +#' @param sorting_order A character string specifying the order of sorting. +.check_neatsort_args <- function(x, subset, dimensions, centering_method, sorting_order) { + # Check data is a matrix + if (!is.matrix(x)) { + stop("Input data must be a matrix.") + } + + # Check there is sufficient data + if (nrow(x) == 0 || ncol(x) == 0) { + stop("No data to plot. Matrix must have at least one row and one column.") + } + + # Check subset validity + if (!is.null(subset)) { + if (is.numeric(subset) && any(subset > nrow(x))) { + stop("Subset refers to rows that do not exist in the data.") + } else if (is.character(subset) && any(!subset %in% rownames(x))) { + stop("Subset refers to row names that do not exist in the data.") + } else if (!is.numeric(subset) && !is.character(subset)) { + stop("Subset must be a vector of row indices or names.") + } + } + + # Check dimensions are valid + if (any(dimensions > ncol(x))) { + stop("dimensions refer to columns that do not exist in the data.") + } + + # Check dimension vector is of length 2 + if (length(dimensions) != 2) { + stop("Exactly two dimensions must be specified.") + } + + # Check centering_method + centering_method <- match.arg(centering_method, c("mean", "median", "none")) + + # Check sorting_order + sorting_order <- match.arg(sorting_order, c("ascending", "descending")) + + # Check for unique row names + if (any(duplicated(rownames(x)))) { + stop("Row names of the matrix must be unique.") + } + + # Check for unique column names + if (any(duplicated(colnames(x)))) { + stop("Column names of the matrix must be unique.") + } +} + + +#' @description Sorts a matrix by radial theta angle. +#' @param x A matrix containing the ordinated data to be sorted. +#' @param subset A vector specifying a subset of rows to be used and retained. +#' @param dimensions A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates. +#' @param centering_method A character string specifying the method to center the data. +#' @param sorting_order A character string specifying the order of sorting. +#' @return A vector of row names in the sorted order. +#' @rdname neatsort +#' @export +setMethod("neatsort", signature = c("matrix"), + function(x, + subset = NULL, + dimensions = c(1, 2), + centering_method = c("mean", "median", "none"), + sorting_order = c("ascending", "descending"), + ...){ + + # Check args + .check_neatsort_args(x, subset, dimensions, centering_method, sorting_order) + + # Create subset if required + if( !is.null(subset) ){ + x <- .take_subset(x, subset) + } + + # Take the correct dimensions + x <- .take_dimensions(x, dimensions) + + # Get the theta values and order them + theta_values <- .radial_theta(x, centering_method) + ordering <- .get_sorted_rownames(theta_values, sorting_order) + + return(ordering) + } + ) + + +#' .take_subset +#' @description Takes a subset of rows from the data matrix. +#' @param data The data matrix +#' @param subset The subset of rows to be retained +#' @return The subset of the data matrix +.take_subset <- function(data, subset) { + data <- data[subset, , drop = FALSE] + return(data) +} + + +#' .take_dimensions +#' @description Takes the specified columns (dimensions) from the data matrix. +#' @param data The data matrix +#' @param dimensions The columns to retain +#' @return The data matrix with only the specified dimensions +.take_dimensions <- function(data, dimensions) { + data <- data[, dimensions, drop = FALSE] + return(data) +} + + +#' .radial_theta +#' @description Computes the radial theta values for each row in the data matrix. +#' @param data The data matrix +#' @param centering_method The method used for centering the data +#' @return A named vector of theta values for each row +.radial_theta <- function(data, centering_method) { + if (centering_method == "mean") { + centered_data <- scale(data, center = TRUE, scale = FALSE) + } else if (centering_method == "median") { + centered_data <- scale(data, center = apply(data, 2, median), scale = FALSE) + } else if (centering_method == "none") { + centered_data <- data + } else { + stop("Unsupported centering method. Choose either 'mean', 'median', 'mode', or 'none'.") + } + + theta <- atan2(centered_data[, 2], centered_data[, 1]) + names(theta) <- rownames(centered_data) + + return(theta) +} + +#' .get_sorted_rownames +#' @description Sorts the theta values and returns the ordered row names. +#' @param theta_values A named vector of theta values +#' @param sorting_order The order of sorting (ascending or descending) +#' @return A vector of row names in the sorted order +.get_sorted_rownames <- function(theta_values, sorting_order) { + sorted_indices <- order(theta_values, decreasing = (sorting_order == "descending")) + rownames <- names(theta_values)[sorted_indices] + return(rownames) +} + diff --git a/man/dot-check_neatsort_args.Rd b/man/dot-check_neatsort_args.Rd new file mode 100644 index 00000000..cfc8b219 --- /dev/null +++ b/man/dot-check_neatsort_args.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neatsort.R +\name{.check_neatsort_args} +\alias{.check_neatsort_args} +\title{.check_neatsort_args} +\usage{ +.check_neatsort_args(x, subset, dimensions, centering_method, sorting_order) +} +\arguments{ +\item{x}{A matrix containing the ordinated data to be sorted.} + +\item{subset}{A vector specifying a subset of rows to be used and retained.} + +\item{dimensions}{A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates.} + +\item{centering_method}{A character string specifying the method to center the data.} + +\item{sorting_order}{A character string specifying the order of sorting.} +} +\description{ +Checks if the input arguments for the neatsort function are valid. +} diff --git a/man/dot-get_sorted_rownames.Rd b/man/dot-get_sorted_rownames.Rd new file mode 100644 index 00000000..2f1b6238 --- /dev/null +++ b/man/dot-get_sorted_rownames.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neatsort.R +\name{.get_sorted_rownames} +\alias{.get_sorted_rownames} +\title{.get_sorted_rownames} +\usage{ +.get_sorted_rownames(theta_values, sorting_order) +} +\arguments{ +\item{theta_values}{A named vector of theta values} + +\item{sorting_order}{The order of sorting (ascending or descending)} +} +\value{ +A vector of row names in the sorted order +} +\description{ +Sorts the theta values and returns the ordered row names. +} diff --git a/man/dot-radial_theta.Rd b/man/dot-radial_theta.Rd new file mode 100644 index 00000000..2f1eaf86 --- /dev/null +++ b/man/dot-radial_theta.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neatsort.R +\name{.radial_theta} +\alias{.radial_theta} +\title{.radial_theta} +\usage{ +.radial_theta(data, centering_method) +} +\arguments{ +\item{data}{The data matrix} + +\item{centering_method}{The method used for centering the data} +} +\value{ +A named vector of theta values for each row +} +\description{ +Computes the radial theta values for each row in the data matrix. +} diff --git a/man/dot-take_dimensions.Rd b/man/dot-take_dimensions.Rd new file mode 100644 index 00000000..65753b23 --- /dev/null +++ b/man/dot-take_dimensions.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neatsort.R +\name{.take_dimensions} +\alias{.take_dimensions} +\title{.take_dimensions} +\usage{ +.take_dimensions(data, dimensions) +} +\arguments{ +\item{data}{The data matrix} + +\item{dimensions}{The columns to retain} +} +\value{ +The data matrix with only the specified dimensions +} +\description{ +Takes the specified columns (dimensions) from the data matrix. +} diff --git a/man/dot-take_subset.Rd b/man/dot-take_subset.Rd new file mode 100644 index 00000000..4976a12c --- /dev/null +++ b/man/dot-take_subset.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neatsort.R +\name{.take_subset} +\alias{.take_subset} +\title{.take_subset} +\usage{ +.take_subset(data, subset) +} +\arguments{ +\item{data}{The data matrix} + +\item{subset}{The subset of rows to be retained} +} +\value{ +The subset of the data matrix +} +\description{ +Takes a subset of rows from the data matrix. +} diff --git a/man/neatsort.Rd b/man/neatsort.Rd new file mode 100644 index 00000000..aa8a8c7f --- /dev/null +++ b/man/neatsort.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/neatsort.R +\name{neatsort} +\alias{neatsort} +\alias{neatsort,matrix-method} +\title{Sorting by radial theta angle} +\usage{ +neatsort(x, ...) + +\S4method{neatsort}{matrix}( + x, + subset = NULL, + dimensions = c(1, 2), + centering_method = c("mean", "median", "none"), + sorting_order = c("ascending", "descending"), + ... +) +} +\arguments{ +\item{x}{A matrix containing the ordinated data to be sorted.} + +\item{...}{Additional arguments passed to other methods.} + +\item{subset}{A vector specifying a subset of rows to be used and retained.} + +\item{dimensions}{A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates.} + +\item{centering_method}{A character string specifying the method to center the data.} + +\item{sorting_order}{A character string specifying the order of sorting.} +} +\value{ +A vector of row names in the sorted order. + +A vector of row names in the sorted order. +} +\description{ +\code{neatsort} sorts already ordinated data by the radial theta angle. +This method is useful for organizing data points based on their angular +position in a 2D space, typically after an ordination technique such as PCA or NMDS +has been applied. + +The function takes in a matrix of ordinated data, optionally +centers the data using specified methods (mean, median, or none), and then calculates +the angle (theta) for each point relative to the centroid. The data points are then +sorted based on these theta values in either ascending or descending order. + +One significant application of this sorting method is in plotting heatmaps. +By using radial theta sorting, the relationships between data points can be preserved +according to the ordination method's spatial configuration, rather than relying on +hierarchical clustering, which may distort these relationships. This approach +allows for a more faithful representation of the data's intrinsic structure as captured +by the ordination process. + +Sorts a matrix by radial theta angle. +} +\examples{ +# Load required libraries +library(mia) + +# Load the dataset +data(peerj13075) + +# Agglomerate by Order and transform the data +tse_order <- mergeFeaturesByRank(peerj13075, rank = "order", onRankOnly = TRUE) +tse_order <- transformAssay(tse_order, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") +tse_order <- transformAssay(tse_order, assay.type="relabundance", method="z", MARGIN = "features", name="z") +z_transformed_data <- assay(tse_order, "z") + +# Get the top taxa and perform PCA +top_taxa <- getTopFeatures(tse_order, top = 10, assay.type="z") +top_feature_data <- z_transformed_data[top_taxa, ] +pca_results <- prcomp(top_feature_data, scale = TRUE) +scores_pca <- pca_results$x[, 1:2] + +# Sort by radial theta and subset the transformed data +sorted_order <- neatsort(scores_pca, dimensions = c(1, 2), centering_method = "mean", sorting_order = "ascending") +ordered_transformed_data <- z_transformed_data[sorted_order, ] +} diff --git a/tests/testthat/test-neatsort.R b/tests/testthat/test-neatsort.R new file mode 100644 index 00000000..68c9e380 --- /dev/null +++ b/tests/testthat/test-neatsort.R @@ -0,0 +1,263 @@ +testthat::set_max_fails(Inf) + + +context("take_subset") +take_subset_matrix <- matrix(1:20, ncol = 5, byrow = TRUE, dimnames = list(c("Feature1", "Feature2", "Feature3", "Feature4"), c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5"))) + +test_that("Test take_subset method", { + # Argument errors + expect_error(miaViz:::.take_subset(), + 'argument "data" is missing') + + # Valid subsetting by row names + subset <- c("Feature1", "Feature3", "Feature4") + result <- miaViz:::.take_subset(take_subset_matrix, subset) + expected <- matrix(c(1, 2, 3, 4, 5, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), nrow = 3, byrow = TRUE, + dimnames = list(c("Feature1", "Feature3", "Feature4"), + c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5"))) + expect_equal(result, expected) + + # Valid subsetting by row indices + subset <- c(1, 3, 4) + result <- miaViz:::.take_subset(take_subset_matrix, subset) + expected <- matrix(c(1, 2, 3, 4, 5, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), nrow = 3, byrow = TRUE, + dimnames = list(c("Feature1", "Feature3", "Feature4"), + c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5"))) + expect_equal(result, expected) + + # Subsetting with indices out of bounds + subset <- c(1, 6) + expect_error(miaViz:::.take_subset(take_subset_matrix, subset), "subscript out of bounds") + + # Subsetting with non-existent row names + subset <- c("Feature1", "Feature6") + expect_error(miaViz:::.take_subset(take_subset_matrix, subset), "subscript out of bounds") + + # Subsetting with incorrect types + subset <- list(1, 3) + expect_error(miaViz:::.take_subset(take_subset_matrix, subset), "invalid subscript type") +}) + + +context("take_dimensions") +take_dimensions_matrix <- matrix(1:20, ncol = 5, byrow = TRUE, dimnames = list(c("Feature1", "Feature2", "Feature3", "Feature4"), c("PC1", "PC2", "PC3", "PC4", "PC5"))) + +test_that("Test take_dimensions method",{ + # Argument errors + expect_error(miaViz:::.take_dimensions(), + 'argument "data" is missing') + + # Valid subsetting by column indices + dimensions <- c(1, 3) + result <- miaViz:::.take_dimensions(take_dimensions_matrix, dimensions) + expected <- matrix(c(1, 6, 11, 16, 3, 8, 13, 18), nrow = 4, byrow = FALSE, + dimnames = list(c("Feature1", "Feature2", "Feature3", "Feature4"), + c("PC1", "PC3"))) + expect_equal(result, expected) + + # Valid subsetting by column names + dimensions <- c("PC1", "PC3") + result <- miaViz:::.take_dimensions(take_dimensions_matrix, dimensions) + expected <- matrix(c(1, 6, 11, 16, 3, 8, 13, 18), nrow = 4, byrow = FALSE, + dimnames = list(c("Feature1", "Feature2", "Feature3", "Feature4"), + c("PC1", "PC3"))) + expect_equal(result, expected) + + # Subsetting with indices out of bounds + subset <- c(1, 6) + expect_error(miaViz:::.take_dimensions(take_dimensions_matrix, subset), "subscript out of bounds") + + # Subsetting with non-existent row names + dimensions <- c("PC1", "PC6") + expect_error(miaViz:::.take_dimensions(take_dimensions_matrix, subset), "subscript out of bounds") + + # Subsetting with incorrect types + subset <- list(1, 3) + expect_error(miaViz:::.take_dimensions(take_dimensions_matrix, subset), "invalid subscript type") +}) + + +context("radial_theta") +radial_theta_matrix <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8), ncol = 2, byrow = TRUE, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2"))) + +test_that("Test radial_theta method", { + # Argument errors + expect_error(miaViz:::.take_dimensions(), + 'argument "data" is missing') + + # Centering by mean + centering_method <- "mean" + result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) + centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2"))) + expected <- atan2(centered_data[, 2], centered_data[, 1]) + names(expected) <- rownames(centered_data) + expect_equal(result, expected) + + # Centering by median + centering_method <- "median" + result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) + centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2"))) + expected <- atan2(centered_data[, 2], centered_data[, 1]) + names(expected) <- rownames(centered_data) + expect_equal(result, expected) + + # No centering + centering_method <- "none" + result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) + centered_data <- radial_theta_matrix + expected <- atan2(centered_data[, 2], centered_data[, 1]) + names(expected) <- rownames(centered_data) + expect_equal(result, expected) + + # Unsupported centering method + centering_method <- "unsupported" + expect_error(miaViz:::.radial_theta(radial_theta_matrix, centering_method), "Unsupported centering method. Choose either 'mean', 'median', 'mode', or 'none'.") +}) + + +context("get_sorted_rownames") +theta_values <- c(Sample1 = 0.5, Sample2 = -1.2, Sample3 = 1.5, Sample4 = -0.8) + +test_that("Test get_sorted_rownames method", { + # Argument errors + expect_error(miaViz:::.get_sorted_rownames(), + 'argument "theta_values" is missing') + expect_error(miaViz:::.get_sorted_rownames(theta_values), + 'argument "sorting_order" is missing') + + # Valid sorting in ascending order + sorting_order <- "ascending" + expected <- c("Sample2", "Sample4", "Sample1", "Sample3") + result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) + expect_equal(result, expected) + + # Sorting in descending order + sorting_order <- "descending" + result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) + expected <- c("Sample3", "Sample1", "Sample4", "Sample2") + expect_equal(result, expected) + + # Edge case: all theta values are the same + theta_values <- c(Sample1 = 0.5, Sample2 = 0.5, Sample3 = 0.5, Sample4 = 0.5) + sorting_order <- "ascending" + result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) + expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained + expect_equal(result, expected) + + sorting_order <- "descending" + result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) + expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained + expect_equal(result, expected) + + # Edge case: theta values contain NULL + theta_values <- c(Sample1 = 0.5, Sample2 = NULL, Sample3 = 1.5, Sample4 = -0.8) + sorting_order <- "ascending" + result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) + expected <- c("Sample4", "Sample1", "Sample3") + expect_equal(result, expected) + + sorting_order <- "descending" + result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) + expected <- c("Sample3", "Sample1", "Sample4") + expect_equal(result, expected) +}) + + +context("check_neatsort_args") +check_args_matrix <- matrix(1:20, nrow = 4, ncol = 5, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2", "PC3", "PC4", "PC5"))) + +test_that("Test check_neatsort_args method", { + # Argument errors + expect_error(miaViz:::check_neatsort_args(), + "object 'check_neatsort_args' not found") + expect_error(miaViz:::.check_neatsort_args(check_args_matrix), + 'argument "subset" is missing') + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, c(1, 3, 4)), + 'argument \"dimensions\" is missing, with no default') + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, c(1, 3, 4), c(1, 3)), + 'argument \"centering_method\" is missing, with no default') + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, c(1, 3, 4), c(1, 3), "mean"), + 'argument "sorting_order" is missing') + + # Non-matrix input + expect_error(miaViz:::.check_neatsort_args(as.data.frame(check_args_matrix), NULL, c(1, 2), "mean", "ascending"), + "Input data must be a matrix.") + + # Test for empty matrix + empty_matrix <- matrix(numeric(0), nrow = 0, ncol = 0) + expect_error(miaViz:::.check_neatsort_args(empty_matrix, NULL, c(1, 2), "mean", "ascending"), + "No data to plot. Matrix must have at least one row and one column.") + + # Test valid subset + expect_silent(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 2), "mean", "ascending")) + expect_silent(miaViz:::.check_neatsort_args(check_args_matrix, c(1, 3, 4), c(1, 2), "mean", "ascending")) + expect_silent(miaViz:::.check_neatsort_args(check_args_matrix, c("Sample1", "Sample2"), c(1, 2), "mean", "ascending")) + + # Test invalid subset + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, 5, c(1, 2), "mean", "ascending"), + "Subset refers to rows that do not exist in the data.") + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, "Sample5", c(1, 2), "mean", "ascending"), + "Subset refers to row names that do not exist in the data.") + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, list(1, 2), c(1, 2), "mean", "ascending"), + "Subset must be a vector of row indices or names.") + + # Test for invalid dimensions + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 6), "mean", "ascending"), + "dimensions refer to columns that do not exist in the data.") + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 2, 3), "mean", "ascending"), + "Exactly two dimensions must be specified.") + + # Test for invalid centering_method + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 2), "invalid_method", "ascending"), + "'arg' should be one of “mean”, “median”, “none”") + + # Test for invalid sorting_order + expect_error(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 2), "mean", "invalid_order"), + "'arg' should be one of “ascending”, “descending”") + + # Test for non-unique row names + non_unique_row_matrix <- matrix(1:20, nrow = 4, ncol = 5, + dimnames = list(c("Sample1", "Sample2", "Sample1", "Sample4"), + c("PC1", "PC2", "PC3", "PC4", "PC5"))) + expect_error(miaViz:::.check_neatsort_args(non_unique_row_matrix, NULL, c(1, 2), "mean", "ascending"), + "Row names of the matrix must be unique.") + + # Test for non-unique column names + non_unique_col_matrix <- matrix(1:20, nrow = 4, ncol = 5, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2", "PC1", "PC4", "PC5"))) + expect_error(miaViz:::.check_neatsort_args(non_unique_col_matrix, NULL, c(1, 2), "mean", "ascending"), + "Column names of the matrix must be unique.") + +}) + + +context("neatsort") +neatsort_matrix <- matrix(c(10, 8, 2, 8, 3, + 4, 4, 4, 6, 5, + 5, 4, 4, 1, 2, + 2, 3, 5, 7, 8), nrow = 4, ncol = 5, byrow = TRUE, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2", "PC3", "PC4", "PC5"))) + +test_that("Test neatsort function", { + # Test with valid inputs, no subset + result <- miaViz::neatsort(neatsort_matrix, subset = NULL, dimensions = c(1, 2), centering_method = "mean", sorting_order = "ascending") + expected <- c("Sample4", "Sample2", "Sample3", "Sample1") + expect_equal(result, expected) + + # Test with valid inputs and subset + subset <- c("Sample1", "Sample3") + result <- miaViz::neatsort(neatsort_matrix, subset = subset, dimensions = c(1, 2), centering_method = "mean", sorting_order = "ascending") + expected <- c("Sample3", "Sample1") + expect_equal(result, expected) +}) + From d87ddc3937a8158d6bffe34cab65f60de26d5a70 Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Thu, 13 Jun 2024 12:15:59 +0300 Subject: [PATCH 04/21] removing excess documentation --- R/neatsort.R | 32 +++++--------------------------- man/dot-check_neatsort_args.Rd | 22 ---------------------- man/dot-get_sorted_rownames.Rd | 19 ------------------- man/dot-radial_theta.Rd | 19 ------------------- man/dot-take_dimensions.Rd | 19 ------------------- man/dot-take_subset.Rd | 19 ------------------- 6 files changed, 5 insertions(+), 125 deletions(-) delete mode 100644 man/dot-check_neatsort_args.Rd delete mode 100644 man/dot-get_sorted_rownames.Rd delete mode 100644 man/dot-radial_theta.Rd delete mode 100644 man/dot-take_dimensions.Rd delete mode 100644 man/dot-take_subset.Rd diff --git a/R/neatsort.R b/R/neatsort.R index ef2613f2..de686001 100644 --- a/R/neatsort.R +++ b/R/neatsort.R @@ -57,13 +57,7 @@ setGeneric("neatsort", signature = c("x"), standardGeneric("neatsort")) -#' .check_neatsort_args -#' @description Checks if the input arguments for the neatsort function are valid. -#' @param x A matrix containing the ordinated data to be sorted. -#' @param subset A vector specifying a subset of rows to be used and retained. -#' @param dimensions A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates. -#' @param centering_method A character string specifying the method to center the data. -#' @param sorting_order A character string specifying the order of sorting. +# Checks if the input arguments for the neatsort function are valid. .check_neatsort_args <- function(x, subset, dimensions, centering_method, sorting_order) { # Check data is a matrix if (!is.matrix(x)) { @@ -151,33 +145,21 @@ setMethod("neatsort", signature = c("matrix"), ) -#' .take_subset -#' @description Takes a subset of rows from the data matrix. -#' @param data The data matrix -#' @param subset The subset of rows to be retained -#' @return The subset of the data matrix +# Takes a subset of rows from the data matrix. .take_subset <- function(data, subset) { data <- data[subset, , drop = FALSE] return(data) } -#' .take_dimensions -#' @description Takes the specified columns (dimensions) from the data matrix. -#' @param data The data matrix -#' @param dimensions The columns to retain -#' @return The data matrix with only the specified dimensions +# Takes the specified columns (dimensions) from the data matrix. .take_dimensions <- function(data, dimensions) { data <- data[, dimensions, drop = FALSE] return(data) } -#' .radial_theta -#' @description Computes the radial theta values for each row in the data matrix. -#' @param data The data matrix -#' @param centering_method The method used for centering the data -#' @return A named vector of theta values for each row +# Computes the radial theta values for each row in the data matrix. .radial_theta <- function(data, centering_method) { if (centering_method == "mean") { centered_data <- scale(data, center = TRUE, scale = FALSE) @@ -195,11 +177,7 @@ setMethod("neatsort", signature = c("matrix"), return(theta) } -#' .get_sorted_rownames -#' @description Sorts the theta values and returns the ordered row names. -#' @param theta_values A named vector of theta values -#' @param sorting_order The order of sorting (ascending or descending) -#' @return A vector of row names in the sorted order +# Sorts the theta values and returns the ordered row names. .get_sorted_rownames <- function(theta_values, sorting_order) { sorted_indices <- order(theta_values, decreasing = (sorting_order == "descending")) rownames <- names(theta_values)[sorted_indices] diff --git a/man/dot-check_neatsort_args.Rd b/man/dot-check_neatsort_args.Rd deleted file mode 100644 index cfc8b219..00000000 --- a/man/dot-check_neatsort_args.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/neatsort.R -\name{.check_neatsort_args} -\alias{.check_neatsort_args} -\title{.check_neatsort_args} -\usage{ -.check_neatsort_args(x, subset, dimensions, centering_method, sorting_order) -} -\arguments{ -\item{x}{A matrix containing the ordinated data to be sorted.} - -\item{subset}{A vector specifying a subset of rows to be used and retained.} - -\item{dimensions}{A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates.} - -\item{centering_method}{A character string specifying the method to center the data.} - -\item{sorting_order}{A character string specifying the order of sorting.} -} -\description{ -Checks if the input arguments for the neatsort function are valid. -} diff --git a/man/dot-get_sorted_rownames.Rd b/man/dot-get_sorted_rownames.Rd deleted file mode 100644 index 2f1b6238..00000000 --- a/man/dot-get_sorted_rownames.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/neatsort.R -\name{.get_sorted_rownames} -\alias{.get_sorted_rownames} -\title{.get_sorted_rownames} -\usage{ -.get_sorted_rownames(theta_values, sorting_order) -} -\arguments{ -\item{theta_values}{A named vector of theta values} - -\item{sorting_order}{The order of sorting (ascending or descending)} -} -\value{ -A vector of row names in the sorted order -} -\description{ -Sorts the theta values and returns the ordered row names. -} diff --git a/man/dot-radial_theta.Rd b/man/dot-radial_theta.Rd deleted file mode 100644 index 2f1eaf86..00000000 --- a/man/dot-radial_theta.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/neatsort.R -\name{.radial_theta} -\alias{.radial_theta} -\title{.radial_theta} -\usage{ -.radial_theta(data, centering_method) -} -\arguments{ -\item{data}{The data matrix} - -\item{centering_method}{The method used for centering the data} -} -\value{ -A named vector of theta values for each row -} -\description{ -Computes the radial theta values for each row in the data matrix. -} diff --git a/man/dot-take_dimensions.Rd b/man/dot-take_dimensions.Rd deleted file mode 100644 index 65753b23..00000000 --- a/man/dot-take_dimensions.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/neatsort.R -\name{.take_dimensions} -\alias{.take_dimensions} -\title{.take_dimensions} -\usage{ -.take_dimensions(data, dimensions) -} -\arguments{ -\item{data}{The data matrix} - -\item{dimensions}{The columns to retain} -} -\value{ -The data matrix with only the specified dimensions -} -\description{ -Takes the specified columns (dimensions) from the data matrix. -} diff --git a/man/dot-take_subset.Rd b/man/dot-take_subset.Rd deleted file mode 100644 index 4976a12c..00000000 --- a/man/dot-take_subset.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/neatsort.R -\name{.take_subset} -\alias{.take_subset} -\title{.take_subset} -\usage{ -.take_subset(data, subset) -} -\arguments{ -\item{data}{The data matrix} - -\item{subset}{The subset of rows to be retained} -} -\value{ -The subset of the data matrix -} -\description{ -Takes a subset of rows from the data matrix. -} From efe905ab80103cdb3f6b4dda3f99b80fe521ef1e Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Fri, 14 Jun 2024 14:20:19 +0300 Subject: [PATCH 05/21] made formatting changes --- NAMESPACE | 2 +- R/{neatsort.R => getNeatOrder.R} | 145 +++++++-------- man/{neatsort.Rd => getNeatOrder.Rd} | 32 ++-- tests/testthat/test-getNeatOrder.R | 203 +++++++++++++++++++++ tests/testthat/test-neatsort.R | 263 --------------------------- 5 files changed, 287 insertions(+), 358 deletions(-) rename R/{neatsort.R => getNeatOrder.R} (58%) rename man/{neatsort.Rd => getNeatOrder.Rd} (71%) create mode 100644 tests/testthat/test-getNeatOrder.R delete mode 100644 tests/testthat/test-neatsort.R diff --git a/NAMESPACE b/NAMESPACE index 363a4b4e..50755b69 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,7 +17,7 @@ exportMethods("colTreeData<-") exportMethods("rowTreeData<-") exportMethods(colTreeData) exportMethods(combineTreeData) -exportMethods(neatsort) +exportMethods(getNeatOrder) exportMethods(plotAbundance) exportMethods(plotAbundanceDensity) exportMethods(plotCCA) diff --git a/R/neatsort.R b/R/getNeatOrder.R similarity index 58% rename from R/neatsort.R rename to R/getNeatOrder.R index de686001..efd06c34 100644 --- a/R/neatsort.R +++ b/R/getNeatOrder.R @@ -1,6 +1,6 @@ #' Sorting by radial theta angle #' -#' @description \code{neatsort} sorts already ordinated data by the radial theta angle. +#' @description \code{getNeatOrder} sorts already ordinated data by the radial theta angle. #' This method is useful for organizing data points based on their angular #' position in a 2D space, typically after an ordination technique such as PCA or NMDS #' has been applied. @@ -21,11 +21,11 @@ #' @param subset A vector specifying a subset of rows to be used and retained. If NULL, all rows are used. #' @param dimensions A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates. #' @param centering_method A character string specifying the method to center the data. Options are "mean", "median", or "none" if your data is already centred. -#' @param sorting_order A character string specifying the order of sorting. Options are "ascending" or "descending". +#' @param decreasing A boolean that when true sorts the rows in a descending order by radial theta angle. Default is False. #' @param ... Additional arguments passed to other methods. #' @return A vector of row names in the sorted order. #' -#' @name neatsort +#' @name getNeatOrder #' #' @examples #' # Load required libraries @@ -35,7 +35,7 @@ #' data(peerj13075) #' #' # Agglomerate by Order and transform the data -#' tse_order <- mergeFeaturesByRank(peerj13075, rank = "order", onRankOnly = TRUE) +#' tse_order <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) #' tse_order <- transformAssay(tse_order, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") #' tse_order <- transformAssay(tse_order, assay.type="relabundance", method="z", MARGIN = "features", name="z") #' z_transformed_data <- assay(tse_order, "z") @@ -47,115 +47,107 @@ #' scores_pca <- pca_results$x[, 1:2] #' #' # Sort by radial theta and subset the transformed data -#' sorted_order <- neatsort(scores_pca, dimensions = c(1, 2), centering_method = "mean", sorting_order = "ascending") +#' sorted_order <- getNeatOrder(scores_pca, dimensions = c(1, 2), centering_method = "mean") #' ordered_transformed_data <- z_transformed_data[sorted_order, ] NULL -#' @rdname neatsort -setGeneric("neatsort", signature = c("x"), +#' @rdname getNeatOrder +setGeneric("getNeatOrder", signature = c("x"), function(x, ...) - standardGeneric("neatsort")) + standardGeneric("getNeatOrder")) -# Checks if the input arguments for the neatsort function are valid. -.check_neatsort_args <- function(x, subset, dimensions, centering_method, sorting_order) { +# Implementation for taking in a raw matrix. +#' @rdname getNeatOrder +#' @export +setMethod("getNeatOrder", signature = c("matrix"), + function(x, + subset = NULL, + dimensions = c(1, 2), + centering_method = c("mean", "median", "none"), + decreasing = FALSE, + ...){ + + # Check args + .check_args(x, subset, dimensions, centering_method, decreasing) + + # Create subset if required + if( !is.null(subset) ){ + x <- x[subset, , drop = FALSE] + } + + # Take the correct dimensions + x <- x[, dimensions, drop = FALSE] + + # Get the theta values and order them + theta_values <- .radial_theta(x, centering_method) + ordering <- .get_sorted_rownames(theta_values, decreasing) + + return(ordering) + } + ) + + +# Checks the method arguments. +.check_args <- function(x, subset, dimensions, centering_method, decreasing) { # Check data is a matrix if (!is.matrix(x)) { - stop("Input data must be a matrix.") + stop("Input data must be a matrix.", call. = FALSE) } # Check there is sufficient data if (nrow(x) == 0 || ncol(x) == 0) { - stop("No data to plot. Matrix must have at least one row and one column.") + stop("No data to plot. Matrix must have at least one row and one column.", call. = FALSE) } # Check subset validity if (!is.null(subset)) { if (is.numeric(subset) && any(subset > nrow(x))) { - stop("Subset refers to rows that do not exist in the data.") + stop("Subset refers to rows that do not exist in the data.", call. = FALSE) } else if (is.character(subset) && any(!subset %in% rownames(x))) { - stop("Subset refers to row names that do not exist in the data.") + stop("Subset refers to row names that do not exist in the data.", call. = FALSE) } else if (!is.numeric(subset) && !is.character(subset)) { - stop("Subset must be a vector of row indices or names.") + stop("Subset must be a vector of row indices or names.", call. = FALSE) } } # Check dimensions are valid - if (any(dimensions > ncol(x))) { - stop("dimensions refer to columns that do not exist in the data.") + if (is.numeric(dimensions)) { + if (any(dimensions > ncol(x) | dimensions < 1)) { + stop("dimensions refer to columns that do not exist in the data.", call. = FALSE) + } + } else if (is.character(dimensions)) { + if (any(!dimensions %in% colnames(x))) { + stop("dimensions refer to column names that do not exist in the data.", call. = FALSE) + } + } else { + stop("dimensions must be a vector of column indices or names.", call. = FALSE) } # Check dimension vector is of length 2 if (length(dimensions) != 2) { - stop("Exactly two dimensions must be specified.") + stop("Exactly two dimensions must be specified.", call. = FALSE) } # Check centering_method centering_method <- match.arg(centering_method, c("mean", "median", "none")) - # Check sorting_order - sorting_order <- match.arg(sorting_order, c("ascending", "descending")) + # Check decreasing + if (!is.logical(decreasing) || length(decreasing) != 1) { + stop("decreasing must be a single boolean value.", call. = FALSE) + } # Check for unique row names if (any(duplicated(rownames(x)))) { - stop("Row names of the matrix must be unique.") + stop("Row names of the matrix must be unique.", call. = FALSE) } # Check for unique column names if (any(duplicated(colnames(x)))) { - stop("Column names of the matrix must be unique.") + stop("Column names of the matrix must be unique.", call. = FALSE) } -} - - -#' @description Sorts a matrix by radial theta angle. -#' @param x A matrix containing the ordinated data to be sorted. -#' @param subset A vector specifying a subset of rows to be used and retained. -#' @param dimensions A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates. -#' @param centering_method A character string specifying the method to center the data. -#' @param sorting_order A character string specifying the order of sorting. -#' @return A vector of row names in the sorted order. -#' @rdname neatsort -#' @export -setMethod("neatsort", signature = c("matrix"), - function(x, - subset = NULL, - dimensions = c(1, 2), - centering_method = c("mean", "median", "none"), - sorting_order = c("ascending", "descending"), - ...){ - - # Check args - .check_neatsort_args(x, subset, dimensions, centering_method, sorting_order) - - # Create subset if required - if( !is.null(subset) ){ - x <- .take_subset(x, subset) - } - - # Take the correct dimensions - x <- .take_dimensions(x, dimensions) - - # Get the theta values and order them - theta_values <- .radial_theta(x, centering_method) - ordering <- .get_sorted_rownames(theta_values, sorting_order) - - return(ordering) - } - ) - - -# Takes a subset of rows from the data matrix. -.take_subset <- function(data, subset) { - data <- data[subset, , drop = FALSE] - return(data) -} - - -# Takes the specified columns (dimensions) from the data matrix. -.take_dimensions <- function(data, dimensions) { - data <- data[, dimensions, drop = FALSE] - return(data) + + return(NULL) } @@ -168,7 +160,7 @@ setMethod("neatsort", signature = c("matrix"), } else if (centering_method == "none") { centered_data <- data } else { - stop("Unsupported centering method. Choose either 'mean', 'median', 'mode', or 'none'.") + stop("Unsupported centering method. Choose either 'mean', 'median', 'mode', or 'none'.", call. = FALSE) } theta <- atan2(centered_data[, 2], centered_data[, 1]) @@ -178,9 +170,10 @@ setMethod("neatsort", signature = c("matrix"), } # Sorts the theta values and returns the ordered row names. -.get_sorted_rownames <- function(theta_values, sorting_order) { - sorted_indices <- order(theta_values, decreasing = (sorting_order == "descending")) +.get_sorted_rownames <- function(theta_values, decreasing) { + sorted_indices <- order(theta_values, decreasing = decreasing) rownames <- names(theta_values)[sorted_indices] return(rownames) } + diff --git a/man/neatsort.Rd b/man/getNeatOrder.Rd similarity index 71% rename from man/neatsort.Rd rename to man/getNeatOrder.Rd index aa8a8c7f..51efdac1 100644 --- a/man/neatsort.Rd +++ b/man/getNeatOrder.Rd @@ -1,41 +1,39 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/neatsort.R -\name{neatsort} -\alias{neatsort} -\alias{neatsort,matrix-method} +% Please edit documentation in R/getNeatOrder.R +\name{getNeatOrder} +\alias{getNeatOrder} +\alias{getNeatOrder,matrix-method} \title{Sorting by radial theta angle} \usage{ -neatsort(x, ...) +getNeatOrder(x, ...) -\S4method{neatsort}{matrix}( +\S4method{getNeatOrder}{matrix}( x, subset = NULL, dimensions = c(1, 2), centering_method = c("mean", "median", "none"), - sorting_order = c("ascending", "descending"), + decreasing = FALSE, ... ) } \arguments{ -\item{x}{A matrix containing the ordinated data to be sorted.} +\item{x}{A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g., features or samples).} \item{...}{Additional arguments passed to other methods.} -\item{subset}{A vector specifying a subset of rows to be used and retained.} +\item{subset}{A vector specifying a subset of rows to be used and retained. If NULL, all rows are used.} \item{dimensions}{A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates.} -\item{centering_method}{A character string specifying the method to center the data.} +\item{centering_method}{A character string specifying the method to center the data. Options are "mean", "median", or "none" if your data is already centred.} -\item{sorting_order}{A character string specifying the order of sorting.} +\item{decreasing}{A boolean that when true sorts the rows in a descending order by radial theta angle. Default is False.} } \value{ -A vector of row names in the sorted order. - A vector of row names in the sorted order. } \description{ -\code{neatsort} sorts already ordinated data by the radial theta angle. +\code{getNeatOrder} sorts already ordinated data by the radial theta angle. This method is useful for organizing data points based on their angular position in a 2D space, typically after an ordination technique such as PCA or NMDS has been applied. @@ -51,8 +49,6 @@ according to the ordination method's spatial configuration, rather than relying hierarchical clustering, which may distort these relationships. This approach allows for a more faithful representation of the data's intrinsic structure as captured by the ordination process. - -Sorts a matrix by radial theta angle. } \examples{ # Load required libraries @@ -62,7 +58,7 @@ library(mia) data(peerj13075) # Agglomerate by Order and transform the data -tse_order <- mergeFeaturesByRank(peerj13075, rank = "order", onRankOnly = TRUE) +tse_order <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) tse_order <- transformAssay(tse_order, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") tse_order <- transformAssay(tse_order, assay.type="relabundance", method="z", MARGIN = "features", name="z") z_transformed_data <- assay(tse_order, "z") @@ -74,6 +70,6 @@ pca_results <- prcomp(top_feature_data, scale = TRUE) scores_pca <- pca_results$x[, 1:2] # Sort by radial theta and subset the transformed data -sorted_order <- neatsort(scores_pca, dimensions = c(1, 2), centering_method = "mean", sorting_order = "ascending") +sorted_order <- getNeatOrder(scores_pca, dimensions = c(1, 2), centering_method = "mean") ordered_transformed_data <- z_transformed_data[sorted_order, ] } diff --git a/tests/testthat/test-getNeatOrder.R b/tests/testthat/test-getNeatOrder.R new file mode 100644 index 00000000..332e54bb --- /dev/null +++ b/tests/testthat/test-getNeatOrder.R @@ -0,0 +1,203 @@ +testthat::set_max_fails(Inf) + + +context("neatsort") +neatsort_matrix <- matrix(c(10, 8, 2, 8, 3, + 4, 4, 4, 6, 5, + 5, 4, 4, 1, 2, + 2, 3, 5, 7, 8), nrow = 4, ncol = 5, byrow = TRUE, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2", "PC3", "PC4", "PC5"))) + +test_that("Test getNeatOrder function", { + # Test with valid inputs, no subset + result <- miaViz::getNeatOrder(neatsort_matrix, dimensions = c(1, 2), centering_method = "mean") + expected <- c("Sample4", "Sample2", "Sample3", "Sample1") + expect_equal(result, expected) + + # Test with valid inputs, no subset, decreasing parameter = TRUE + result <- miaViz::getNeatOrder(neatsort_matrix, dimensions = c(1, 2), centering_method = "mean", decreasing = TRUE) + expected <- c("Sample1", "Sample3", "Sample2", "Sample4") + expect_equal(result, expected) + + # Test with dimensions col names + result <- miaViz::getNeatOrder(neatsort_matrix, dimensions = c("PC1", "PC2"), centering_method = "mean") + expected <- c("Sample4", "Sample2", "Sample3", "Sample1") + expect_equal(result, expected) + + # Test with valid inputs and subset row names + subset <- c("Sample1", "Sample3") + result <- miaViz::getNeatOrder(neatsort_matrix, subset, dimensions = c(1, 2), centering_method = "mean") + expected <- c("Sample3", "Sample1") + expect_equal(result, expected) + + # Test with valid inputs and subset row indices + subset <- c(1, 3) + result <- miaViz::getNeatOrder(neatsort_matrix, subset, dimensions = c(1, 2), centering_method = "mean") + expected <- c("Sample3", "Sample1") + expect_equal(result, expected) +}) + + +context("check_args") +check_args_matrix <- matrix(1:20, nrow = 4, ncol = 5, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2", "PC3", "PC4", "PC5"))) + +test_that("Test check_args method", { + # Argument errors + expect_error(miaViz:::check_args(), + "object 'check_args' not found") + expect_error(miaViz:::.check_args(check_args_matrix), + 'argument "subset" is missing') + expect_error(miaViz:::.check_args(check_args_matrix, c(1, 3, 4)), + 'argument \"dimensions\" is missing, with no default') + expect_error(miaViz:::.check_args(check_args_matrix, c(1, 3, 4), c(1, 3)), + 'argument \"centering_method\" is missing, with no default') + expect_error(miaViz:::.check_args(check_args_matrix, c(1, 3, 4), c(1, 3), "mean"), + 'argument "decreasing" is missing') + + # Non-matrix input + expect_error(miaViz:::.check_args(as.data.frame(check_args_matrix), NULL, c(1, 2), "mean", FALSE), + "Input data must be a matrix.") + + # Test for empty matrix + empty_matrix <- matrix(numeric(0), nrow = 0, ncol = 0) + expect_error(miaViz:::.check_args(empty_matrix, NULL, c(1, 2), "mean", FALSE), + "No data to plot. Matrix must have at least one row and one column.") + + # Test valid subset + expect_silent(miaViz:::.check_args(check_args_matrix, NULL, c(1, 2), "mean", FALSE)) + expect_silent(miaViz:::.check_args(check_args_matrix, c(1, 3, 4), c(1, 2), "mean", FALSE)) + expect_silent(miaViz:::.check_args(check_args_matrix, c("Sample1", "Sample2"), c(1, 2), "mean", FALSE)) + + # Test invalid subset + expect_error(miaViz:::.check_args(check_args_matrix, 5, c(1, 2), "mean", FALSE), + "Subset refers to rows that do not exist in the data.") + expect_error(miaViz:::.check_args(check_args_matrix, "Sample5", c(1, 2), "mean", FALSE), + "Subset refers to row names that do not exist in the data.") + expect_error(miaViz:::.check_args(check_args_matrix, list(1, 2), c(1, 2), "mean", FALSE), + "Subset must be a vector of row indices or names.") + + # Test for invalid dimensions + expect_error(miaViz:::.check_args(check_args_matrix, NULL, c(1, 6), "mean", FALSE), + "dimensions refer to columns that do not exist in the data.") + expect_error(miaViz:::.check_args(check_args_matrix, NULL, c("PC6"), "mean", FALSE), + "dimensions refer to column names that do not exist in the data.") + expect_error(miaViz:::.check_args(check_args_matrix, NULL, list("PC6"), "mean", FALSE), + "dimensions must be a vector of column indices or names.") + expect_error(miaViz:::.check_args(check_args_matrix, NULL, c(1, 2, 3), "mean", FALSE), + "Exactly two dimensions must be specified.") + + # Test for invalid centering_method + expect_error(miaViz:::.check_args(check_args_matrix, NULL, c(1, 2), "invalid_method", FALSE), + "'arg' should be one of “mean”, “median”, “none”") + + # Test for invalid sorting_order + expect_error(miaViz:::.check_args(check_args_matrix, NULL, c(1, 2), "mean", "invalid_order"), + "decreasing must be a single boolean value.") + + # Test for non-unique row names + non_unique_row_matrix <- matrix(1:20, nrow = 4, ncol = 5, + dimnames = list(c("Sample1", "Sample2", "Sample1", "Sample4"), + c("PC1", "PC2", "PC3", "PC4", "PC5"))) + expect_error(miaViz:::.check_args(non_unique_row_matrix, NULL, c(1, 2), "mean", FALSE), + "Row names of the matrix must be unique.") + + # Test for non-unique column names + non_unique_col_matrix <- matrix(1:20, nrow = 4, ncol = 5, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2", "PC1", "PC4", "PC5"))) + expect_error(miaViz:::.check_args(non_unique_col_matrix, NULL, c(1, 2), "mean", FALSE), + "Column names of the matrix must be unique.") + +}) + + +context("radial_theta") +radial_theta_matrix <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8), ncol = 2, byrow = TRUE, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2"))) + +test_that("Test radial_theta method", { + # Argument errors + expect_error(miaViz:::.radial_theta(), + 'argument \"centering_method\" is missing, with no default') + expect_error(miaViz:::.radial_theta(radial_theta_matrix), + 'argument "centering_method" is missing') + + # Centering by mean + centering_method <- "mean" + result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) + centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2"))) + expected <- atan2(centered_data[, 2], centered_data[, 1]) + names(expected) <- rownames(centered_data) + expect_equal(result, expected) + + # Centering by median + centering_method <- "median" + result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) + centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, + dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), + c("PC1", "PC2"))) + expected <- atan2(centered_data[, 2], centered_data[, 1]) + names(expected) <- rownames(centered_data) + expect_equal(result, expected) + + # No centering + centering_method <- "none" + result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) + centered_data <- radial_theta_matrix + expected <- atan2(centered_data[, 2], centered_data[, 1]) + names(expected) <- rownames(centered_data) + expect_equal(result, expected) + + # Unsupported centering method + centering_method <- "unsupported" + expect_error(miaViz:::.radial_theta(radial_theta_matrix, centering_method), "Unsupported centering method. Choose either 'mean', 'median', 'mode', or 'none'.") +}) + + +context("get_sorted_rownames") +theta_values <- c(Sample1 = 0.5, Sample2 = -1.2, Sample3 = 1.5, Sample4 = -0.8) + +test_that("Test get_sorted_rownames method", { + # Argument errors + expect_error(miaViz:::.get_sorted_rownames(), + 'argument "theta_values" is missing') + expect_error(miaViz:::.get_sorted_rownames(theta_values), + 'argument "decreasing" is missing') + + # Valid sorting in ascending order + expected <- c("Sample2", "Sample4", "Sample1", "Sample3") + result <- miaViz:::.get_sorted_rownames(theta_values, FALSE) + expect_equal(result, expected) + + # Sorting in descending order + result <- miaViz:::.get_sorted_rownames(theta_values, TRUE) + expected <- c("Sample3", "Sample1", "Sample4", "Sample2") + expect_equal(result, expected) + + # Edge case: all theta values are the same + theta_values <- c(Sample1 = 0.5, Sample2 = 0.5, Sample3 = 0.5, Sample4 = 0.5) + result <- miaViz:::.get_sorted_rownames(theta_values, FALSE) + expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained + expect_equal(result, expected) + + result <- miaViz:::.get_sorted_rownames(theta_values, TRUE) + expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained + expect_equal(result, expected) + + # Edge case: theta values contain NULL + theta_values <- c(Sample1 = 0.5, Sample2 = NULL, Sample3 = 1.5, Sample4 = -0.8) + result <- miaViz:::.get_sorted_rownames(theta_values, FALSE) + expected <- c("Sample4", "Sample1", "Sample3") + expect_equal(result, expected) + + result <- miaViz:::.get_sorted_rownames(theta_values, TRUE) + expected <- c("Sample3", "Sample1", "Sample4") + expect_equal(result, expected) +}) + diff --git a/tests/testthat/test-neatsort.R b/tests/testthat/test-neatsort.R deleted file mode 100644 index 68c9e380..00000000 --- a/tests/testthat/test-neatsort.R +++ /dev/null @@ -1,263 +0,0 @@ -testthat::set_max_fails(Inf) - - -context("take_subset") -take_subset_matrix <- matrix(1:20, ncol = 5, byrow = TRUE, dimnames = list(c("Feature1", "Feature2", "Feature3", "Feature4"), c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5"))) - -test_that("Test take_subset method", { - # Argument errors - expect_error(miaViz:::.take_subset(), - 'argument "data" is missing') - - # Valid subsetting by row names - subset <- c("Feature1", "Feature3", "Feature4") - result <- miaViz:::.take_subset(take_subset_matrix, subset) - expected <- matrix(c(1, 2, 3, 4, 5, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), nrow = 3, byrow = TRUE, - dimnames = list(c("Feature1", "Feature3", "Feature4"), - c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5"))) - expect_equal(result, expected) - - # Valid subsetting by row indices - subset <- c(1, 3, 4) - result <- miaViz:::.take_subset(take_subset_matrix, subset) - expected <- matrix(c(1, 2, 3, 4, 5, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), nrow = 3, byrow = TRUE, - dimnames = list(c("Feature1", "Feature3", "Feature4"), - c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5"))) - expect_equal(result, expected) - - # Subsetting with indices out of bounds - subset <- c(1, 6) - expect_error(miaViz:::.take_subset(take_subset_matrix, subset), "subscript out of bounds") - - # Subsetting with non-existent row names - subset <- c("Feature1", "Feature6") - expect_error(miaViz:::.take_subset(take_subset_matrix, subset), "subscript out of bounds") - - # Subsetting with incorrect types - subset <- list(1, 3) - expect_error(miaViz:::.take_subset(take_subset_matrix, subset), "invalid subscript type") -}) - - -context("take_dimensions") -take_dimensions_matrix <- matrix(1:20, ncol = 5, byrow = TRUE, dimnames = list(c("Feature1", "Feature2", "Feature3", "Feature4"), c("PC1", "PC2", "PC3", "PC4", "PC5"))) - -test_that("Test take_dimensions method",{ - # Argument errors - expect_error(miaViz:::.take_dimensions(), - 'argument "data" is missing') - - # Valid subsetting by column indices - dimensions <- c(1, 3) - result <- miaViz:::.take_dimensions(take_dimensions_matrix, dimensions) - expected <- matrix(c(1, 6, 11, 16, 3, 8, 13, 18), nrow = 4, byrow = FALSE, - dimnames = list(c("Feature1", "Feature2", "Feature3", "Feature4"), - c("PC1", "PC3"))) - expect_equal(result, expected) - - # Valid subsetting by column names - dimensions <- c("PC1", "PC3") - result <- miaViz:::.take_dimensions(take_dimensions_matrix, dimensions) - expected <- matrix(c(1, 6, 11, 16, 3, 8, 13, 18), nrow = 4, byrow = FALSE, - dimnames = list(c("Feature1", "Feature2", "Feature3", "Feature4"), - c("PC1", "PC3"))) - expect_equal(result, expected) - - # Subsetting with indices out of bounds - subset <- c(1, 6) - expect_error(miaViz:::.take_dimensions(take_dimensions_matrix, subset), "subscript out of bounds") - - # Subsetting with non-existent row names - dimensions <- c("PC1", "PC6") - expect_error(miaViz:::.take_dimensions(take_dimensions_matrix, subset), "subscript out of bounds") - - # Subsetting with incorrect types - subset <- list(1, 3) - expect_error(miaViz:::.take_dimensions(take_dimensions_matrix, subset), "invalid subscript type") -}) - - -context("radial_theta") -radial_theta_matrix <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8), ncol = 2, byrow = TRUE, - dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), - c("PC1", "PC2"))) - -test_that("Test radial_theta method", { - # Argument errors - expect_error(miaViz:::.take_dimensions(), - 'argument "data" is missing') - - # Centering by mean - centering_method <- "mean" - result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) - centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, - dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), - c("PC1", "PC2"))) - expected <- atan2(centered_data[, 2], centered_data[, 1]) - names(expected) <- rownames(centered_data) - expect_equal(result, expected) - - # Centering by median - centering_method <- "median" - result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) - centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, - dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), - c("PC1", "PC2"))) - expected <- atan2(centered_data[, 2], centered_data[, 1]) - names(expected) <- rownames(centered_data) - expect_equal(result, expected) - - # No centering - centering_method <- "none" - result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) - centered_data <- radial_theta_matrix - expected <- atan2(centered_data[, 2], centered_data[, 1]) - names(expected) <- rownames(centered_data) - expect_equal(result, expected) - - # Unsupported centering method - centering_method <- "unsupported" - expect_error(miaViz:::.radial_theta(radial_theta_matrix, centering_method), "Unsupported centering method. Choose either 'mean', 'median', 'mode', or 'none'.") -}) - - -context("get_sorted_rownames") -theta_values <- c(Sample1 = 0.5, Sample2 = -1.2, Sample3 = 1.5, Sample4 = -0.8) - -test_that("Test get_sorted_rownames method", { - # Argument errors - expect_error(miaViz:::.get_sorted_rownames(), - 'argument "theta_values" is missing') - expect_error(miaViz:::.get_sorted_rownames(theta_values), - 'argument "sorting_order" is missing') - - # Valid sorting in ascending order - sorting_order <- "ascending" - expected <- c("Sample2", "Sample4", "Sample1", "Sample3") - result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) - expect_equal(result, expected) - - # Sorting in descending order - sorting_order <- "descending" - result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) - expected <- c("Sample3", "Sample1", "Sample4", "Sample2") - expect_equal(result, expected) - - # Edge case: all theta values are the same - theta_values <- c(Sample1 = 0.5, Sample2 = 0.5, Sample3 = 0.5, Sample4 = 0.5) - sorting_order <- "ascending" - result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) - expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained - expect_equal(result, expected) - - sorting_order <- "descending" - result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) - expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained - expect_equal(result, expected) - - # Edge case: theta values contain NULL - theta_values <- c(Sample1 = 0.5, Sample2 = NULL, Sample3 = 1.5, Sample4 = -0.8) - sorting_order <- "ascending" - result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) - expected <- c("Sample4", "Sample1", "Sample3") - expect_equal(result, expected) - - sorting_order <- "descending" - result <- miaViz:::.get_sorted_rownames(theta_values, sorting_order) - expected <- c("Sample3", "Sample1", "Sample4") - expect_equal(result, expected) -}) - - -context("check_neatsort_args") -check_args_matrix <- matrix(1:20, nrow = 4, ncol = 5, - dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), - c("PC1", "PC2", "PC3", "PC4", "PC5"))) - -test_that("Test check_neatsort_args method", { - # Argument errors - expect_error(miaViz:::check_neatsort_args(), - "object 'check_neatsort_args' not found") - expect_error(miaViz:::.check_neatsort_args(check_args_matrix), - 'argument "subset" is missing') - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, c(1, 3, 4)), - 'argument \"dimensions\" is missing, with no default') - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, c(1, 3, 4), c(1, 3)), - 'argument \"centering_method\" is missing, with no default') - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, c(1, 3, 4), c(1, 3), "mean"), - 'argument "sorting_order" is missing') - - # Non-matrix input - expect_error(miaViz:::.check_neatsort_args(as.data.frame(check_args_matrix), NULL, c(1, 2), "mean", "ascending"), - "Input data must be a matrix.") - - # Test for empty matrix - empty_matrix <- matrix(numeric(0), nrow = 0, ncol = 0) - expect_error(miaViz:::.check_neatsort_args(empty_matrix, NULL, c(1, 2), "mean", "ascending"), - "No data to plot. Matrix must have at least one row and one column.") - - # Test valid subset - expect_silent(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 2), "mean", "ascending")) - expect_silent(miaViz:::.check_neatsort_args(check_args_matrix, c(1, 3, 4), c(1, 2), "mean", "ascending")) - expect_silent(miaViz:::.check_neatsort_args(check_args_matrix, c("Sample1", "Sample2"), c(1, 2), "mean", "ascending")) - - # Test invalid subset - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, 5, c(1, 2), "mean", "ascending"), - "Subset refers to rows that do not exist in the data.") - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, "Sample5", c(1, 2), "mean", "ascending"), - "Subset refers to row names that do not exist in the data.") - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, list(1, 2), c(1, 2), "mean", "ascending"), - "Subset must be a vector of row indices or names.") - - # Test for invalid dimensions - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 6), "mean", "ascending"), - "dimensions refer to columns that do not exist in the data.") - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 2, 3), "mean", "ascending"), - "Exactly two dimensions must be specified.") - - # Test for invalid centering_method - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 2), "invalid_method", "ascending"), - "'arg' should be one of “mean”, “median”, “none”") - - # Test for invalid sorting_order - expect_error(miaViz:::.check_neatsort_args(check_args_matrix, NULL, c(1, 2), "mean", "invalid_order"), - "'arg' should be one of “ascending”, “descending”") - - # Test for non-unique row names - non_unique_row_matrix <- matrix(1:20, nrow = 4, ncol = 5, - dimnames = list(c("Sample1", "Sample2", "Sample1", "Sample4"), - c("PC1", "PC2", "PC3", "PC4", "PC5"))) - expect_error(miaViz:::.check_neatsort_args(non_unique_row_matrix, NULL, c(1, 2), "mean", "ascending"), - "Row names of the matrix must be unique.") - - # Test for non-unique column names - non_unique_col_matrix <- matrix(1:20, nrow = 4, ncol = 5, - dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), - c("PC1", "PC2", "PC1", "PC4", "PC5"))) - expect_error(miaViz:::.check_neatsort_args(non_unique_col_matrix, NULL, c(1, 2), "mean", "ascending"), - "Column names of the matrix must be unique.") - -}) - - -context("neatsort") -neatsort_matrix <- matrix(c(10, 8, 2, 8, 3, - 4, 4, 4, 6, 5, - 5, 4, 4, 1, 2, - 2, 3, 5, 7, 8), nrow = 4, ncol = 5, byrow = TRUE, - dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), - c("PC1", "PC2", "PC3", "PC4", "PC5"))) - -test_that("Test neatsort function", { - # Test with valid inputs, no subset - result <- miaViz::neatsort(neatsort_matrix, subset = NULL, dimensions = c(1, 2), centering_method = "mean", sorting_order = "ascending") - expected <- c("Sample4", "Sample2", "Sample3", "Sample1") - expect_equal(result, expected) - - # Test with valid inputs and subset - subset <- c("Sample1", "Sample3") - result <- miaViz::neatsort(neatsort_matrix, subset = subset, dimensions = c(1, 2), centering_method = "mean", sorting_order = "ascending") - expected <- c("Sample3", "Sample1") - expect_equal(result, expected) -}) - From e63fae2ab98c5c8ebbdc54b9afe6eb0320e9b38a Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Tue, 18 Jun 2024 16:11:45 +0300 Subject: [PATCH 06/21] updated method --- R/getNeatOrder.R | 129 ++++++++++++++++------------- man/getNeatOrder.Rd | 80 +++++++++++------- tests/testthat/test-getNeatOrder.R | 107 +++++++++--------------- 3 files changed, 160 insertions(+), 156 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index efd06c34..8ecd75f5 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -6,7 +6,7 @@ #' has been applied. #' #' The function takes in a matrix of ordinated data, optionally -#' centers the data using specified methods (mean, median, or none), and then calculates +#' centers the data using specified methods (\code{mean}, \code{median}, or \code{none}), and then calculates #' the angle (theta) for each point relative to the centroid. The data points are then #' sorted based on these theta values in either ascending or descending order. #' @@ -17,38 +17,64 @@ #' allows for a more faithful representation of the data's intrinsic structure as captured #' by the ordination process. #' -#' @param x A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g., features or samples). -#' @param subset A vector specifying a subset of rows to be used and retained. If NULL, all rows are used. -#' @param dimensions A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates. -#' @param centering_method A character string specifying the method to center the data. Options are "mean", "median", or "none" if your data is already centred. -#' @param decreasing A boolean that when true sorts the rows in a descending order by radial theta angle. Default is False. +#' @param x A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g. features or samples). +#' @param dimensions A \code{character} or \code{integer} vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates. +#' @param centering.method A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{mean}) +#' @param decreasing A \code{boolean} that when \code{TRUE} sorts the rows in a descending order by radial theta angle. (default: descending = \code{FALSE}) #' @param ... Additional arguments passed to other methods. -#' @return A vector of row names in the sorted order. +#' @return A \code{character} vector of row names in the sorted order. +#' +#' @details +#' It's important to note that the sechm package does actually have the functionality for plotting a heatmap using this radial theta angle ordering, though only by using an MDS ordination. +#' +#' This functionality can be found at: +#' +#' \url{https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering}. +#' +#' That being said, the \code{getNeatOrder} function is more modular and separate to the plotting, and +#' can be applied to any kind of ordinated data which can be valuable depending on the use case. +#' +#' @references +#' The below paper outlines the NeatMap method in more detail: +#' +#' Rajaram, S., Oono, Y. NeatMap - non-clustering heat map alternatives in R. BMC Bioinformatics 11, 45 (2010). https://doi.org/10.1186/1471-2105-11-45. +#' +#' It can be found at: +#' +#' \url{https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-45}. #' #' @name getNeatOrder #' #' @examples -#' # Load required libraries +#' ## Load the required libraries and dataset #' library(mia) -#' -#' # Load the dataset +#' library(scater) +#' library(sechm) #' data(peerj13075) #' -#' # Agglomerate by Order and transform the data -#' tse_order <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) -#' tse_order <- transformAssay(tse_order, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") -#' tse_order <- transformAssay(tse_order, assay.type="relabundance", method="z", MARGIN = "features", name="z") -#' z_transformed_data <- assay(tse_order, "z") -#' -#' # Get the top taxa and perform PCA -#' top_taxa <- getTopFeatures(tse_order, top = 10, assay.type="z") -#' top_feature_data <- z_transformed_data[top_taxa, ] -#' pca_results <- prcomp(top_feature_data, scale = TRUE) -#' scores_pca <- pca_results$x[, 1:2] -#' -#' # Sort by radial theta and subset the transformed data -#' sorted_order <- getNeatOrder(scores_pca, dimensions = c(1, 2), centering_method = "mean") -#' ordered_transformed_data <- z_transformed_data[sorted_order, ] +#' ## Group data by taxonomic order +#' tse <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) +#' +#' ## Transform the samples into relative abundances +#' tse <- transformAssay(tse, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") +#' +#' ## Transform the features (taxa) into zero mean, unit variance (z transformation) +#' tse <- transformAssay(tse, assay.type="relabundance", method="z", MARGIN = "features", name="z") +#' +#' ## Perform PCA using calculatePCA +#' pca_result <- calculatePCA(tse, ncomponents = 10, assay.type = "z") +#' +#' ## Add PCA results to the TreeSE object +#' reducedDim(tse, "PCA") <- pca_result +#' +#' ## Sort by radial theta and sort the original assay data +#' sorted_order <- getNeatOrder(reducedDim(tse, "PCA"), dimensions = c(1, 2), centering.method = "mean") +#' tse <- tse[, sorted_order] +#' +#' ## Create the heatmap with sechm whilst retaining this radial theta ordering +#' features <- rownames(assay(tse, "z")) +#' sechm_plot <- sechm(tse, assayName = "z", features=features, do.scale=FALSE, cluster_rows=FALSE, +#' sortRowsOn = NULL) NULL #' @rdname getNeatOrder @@ -62,25 +88,19 @@ setGeneric("getNeatOrder", signature = c("x"), #' @export setMethod("getNeatOrder", signature = c("matrix"), function(x, - subset = NULL, dimensions = c(1, 2), - centering_method = c("mean", "median", "none"), + centering.method = c("mean", "median", "none"), decreasing = FALSE, ...){ # Check args - .check_args(x, subset, dimensions, centering_method, decreasing) - - # Create subset if required - if( !is.null(subset) ){ - x <- x[subset, , drop = FALSE] - } + .check_args(x, subset, dimensions, centering.method, decreasing) # Take the correct dimensions x <- x[, dimensions, drop = FALSE] # Get the theta values and order them - theta_values <- .radial_theta(x, centering_method) + theta_values <- .radial_theta(x, centering.method) ordering <- .get_sorted_rownames(theta_values, decreasing) return(ordering) @@ -89,7 +109,7 @@ setMethod("getNeatOrder", signature = c("matrix"), # Checks the method arguments. -.check_args <- function(x, subset, dimensions, centering_method, decreasing) { +.check_args <- function(x, subset, dimensions, centering.method, decreasing) { # Check data is a matrix if (!is.matrix(x)) { stop("Input data must be a matrix.", call. = FALSE) @@ -100,17 +120,6 @@ setMethod("getNeatOrder", signature = c("matrix"), stop("No data to plot. Matrix must have at least one row and one column.", call. = FALSE) } - # Check subset validity - if (!is.null(subset)) { - if (is.numeric(subset) && any(subset > nrow(x))) { - stop("Subset refers to rows that do not exist in the data.", call. = FALSE) - } else if (is.character(subset) && any(!subset %in% rownames(x))) { - stop("Subset refers to row names that do not exist in the data.", call. = FALSE) - } else if (!is.numeric(subset) && !is.character(subset)) { - stop("Subset must be a vector of row indices or names.", call. = FALSE) - } - } - # Check dimensions are valid if (is.numeric(dimensions)) { if (any(dimensions > ncol(x) | dimensions < 1)) { @@ -129,8 +138,8 @@ setMethod("getNeatOrder", signature = c("matrix"), stop("Exactly two dimensions must be specified.", call. = FALSE) } - # Check centering_method - centering_method <- match.arg(centering_method, c("mean", "median", "none")) + # Check centering.method + centering.method <- match.arg(centering.method, c("mean", "median", "none")) # Check decreasing if (!is.logical(decreasing) || length(decreasing) != 1) { @@ -152,28 +161,30 @@ setMethod("getNeatOrder", signature = c("matrix"), # Computes the radial theta values for each row in the data matrix. -.radial_theta <- function(data, centering_method) { - if (centering_method == "mean") { - centered_data <- scale(data, center = TRUE, scale = FALSE) - } else if (centering_method == "median") { - centered_data <- scale(data, center = apply(data, 2, median), scale = FALSE) - } else if (centering_method == "none") { +.radial_theta <- function(data, centering.method) { + # Choose the correct centering function based on the centering.method + center_fun <- switch(centering.method, "median" = median, "mean" = mean) + + # Apply the centering if there's a centering.method present + if (!is.null(center_fun)) { + center_vals <- apply(data, 2, center_fun) + centered_data <- scale(data, center = center_vals, scale = FALSE) + } else if (centering.method == "none") { centered_data <- data - } else { - stop("Unsupported centering method. Choose either 'mean', 'median', 'mode', or 'none'.", call. = FALSE) } + # Compute the radial theta values using the centered data theta <- atan2(centered_data[, 2], centered_data[, 1]) - names(theta) <- rownames(centered_data) + # Set the names of theta values to the row names of the centered data and return the theta values + names(theta) <- rownames(centered_data) return(theta) } + # Sorts the theta values and returns the ordered row names. .get_sorted_rownames <- function(theta_values, decreasing) { sorted_indices <- order(theta_values, decreasing = decreasing) rownames <- names(theta_values)[sorted_indices] return(rownames) } - - diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index 51efdac1..3315012e 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -9,28 +9,25 @@ getNeatOrder(x, ...) \S4method{getNeatOrder}{matrix}( x, - subset = NULL, dimensions = c(1, 2), - centering_method = c("mean", "median", "none"), + centering.method = c("mean", "median", "none"), decreasing = FALSE, ... ) } \arguments{ -\item{x}{A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g., features or samples).} +\item{x}{A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g. features or samples).} \item{...}{Additional arguments passed to other methods.} -\item{subset}{A vector specifying a subset of rows to be used and retained. If NULL, all rows are used.} +\item{dimensions}{A \code{character} or \code{integer} vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates.} -\item{dimensions}{A vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates.} +\item{centering.method}{A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{mean})} -\item{centering_method}{A character string specifying the method to center the data. Options are "mean", "median", or "none" if your data is already centred.} - -\item{decreasing}{A boolean that when true sorts the rows in a descending order by radial theta angle. Default is False.} +\item{decreasing}{A \code{boolean} that when \code{TRUE} sorts the rows in a descending order by radial theta angle. (default: descending = \code{FALSE})} } \value{ -A vector of row names in the sorted order. +A \code{character} vector of row names in the sorted order. } \description{ \code{getNeatOrder} sorts already ordinated data by the radial theta angle. @@ -39,7 +36,7 @@ position in a 2D space, typically after an ordination technique such as PCA or N has been applied. The function takes in a matrix of ordinated data, optionally -centers the data using specified methods (mean, median, or none), and then calculates +centers the data using specified methods (\code{mean}, \code{median}, or \code{none}), and then calculates the angle (theta) for each point relative to the centroid. The data points are then sorted based on these theta values in either ascending or descending order. @@ -50,26 +47,53 @@ hierarchical clustering, which may distort these relationships. This approach allows for a more faithful representation of the data's intrinsic structure as captured by the ordination process. } +\details{ +It's important to note that the sechm package does actually have the functionality for plotting a heatmap using this radial theta angle ordering, though only by using an MDS ordination. + +This functionality can be found at: + +\url{https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering}. + +That being said, the \code{getNeatOrder} function is more modular and separate to the plotting, and +can be applied to any kind of ordinated data which can be valuable depending on the use case. +} \examples{ -# Load required libraries +## Load the required libraries and dataset library(mia) - -# Load the dataset +library(scater) +library(sechm) data(peerj13075) -# Agglomerate by Order and transform the data -tse_order <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) -tse_order <- transformAssay(tse_order, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") -tse_order <- transformAssay(tse_order, assay.type="relabundance", method="z", MARGIN = "features", name="z") -z_transformed_data <- assay(tse_order, "z") - -# Get the top taxa and perform PCA -top_taxa <- getTopFeatures(tse_order, top = 10, assay.type="z") -top_feature_data <- z_transformed_data[top_taxa, ] -pca_results <- prcomp(top_feature_data, scale = TRUE) -scores_pca <- pca_results$x[, 1:2] - -# Sort by radial theta and subset the transformed data -sorted_order <- getNeatOrder(scores_pca, dimensions = c(1, 2), centering_method = "mean") -ordered_transformed_data <- z_transformed_data[sorted_order, ] +## Group data by taxonomic order +tse <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) + +## Transform the samples into relative abundances +tse <- transformAssay(tse, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") + +## Transform the features (taxa) into zero mean, unit variance (z transformation) +tse <- transformAssay(tse, assay.type="relabundance", method="z", MARGIN = "features", name="z") + +## Perform PCA using calculatePCA +pca_result <- calculatePCA(tse, ncomponents = 10, assay.type = "z") + +## Add PCA results to the TreeSE object +reducedDim(tse, "PCA") <- pca_result + +## Sort by radial theta and sort the original assay data +sorted_order <- getNeatOrder(reducedDim(tse, "PCA"), dimensions = c(1, 2), centering.method = "mean") +tse <- tse[, sorted_order] + +## Create the heatmap with sechm whilst retaining this radial theta ordering +features <- rownames(assay(tse, "z")) +sechm_plot <- sechm(tse, assayName = "z", features=features, do.scale=FALSE, cluster_rows=FALSE, + sortRowsOn = NULL) +} +\references{ +The below paper outlines the NeatMap method in more detail: + +Rajaram, S., Oono, Y. NeatMap - non-clustering heat map alternatives in R. BMC Bioinformatics 11, 45 (2010). https://doi.org/10.1186/1471-2105-11-45. + +It can be found at: + +\url{https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-45}. } diff --git a/tests/testthat/test-getNeatOrder.R b/tests/testthat/test-getNeatOrder.R index 332e54bb..64529af0 100644 --- a/tests/testthat/test-getNeatOrder.R +++ b/tests/testthat/test-getNeatOrder.R @@ -10,32 +10,20 @@ neatsort_matrix <- matrix(c(10, 8, 2, 8, 3, c("PC1", "PC2", "PC3", "PC4", "PC5"))) test_that("Test getNeatOrder function", { - # Test with valid inputs, no subset - result <- miaViz::getNeatOrder(neatsort_matrix, dimensions = c(1, 2), centering_method = "mean") + # Test with valid inputs + result <- getNeatOrder(neatsort_matrix, dimensions = c(1, 2), centering.method = "mean") expected <- c("Sample4", "Sample2", "Sample3", "Sample1") expect_equal(result, expected) - # Test with valid inputs, no subset, decreasing parameter = TRUE - result <- miaViz::getNeatOrder(neatsort_matrix, dimensions = c(1, 2), centering_method = "mean", decreasing = TRUE) + # Test with valid inputs, decreasing parameter = TRUE + result <- getNeatOrder(neatsort_matrix, dimensions = c(1, 2), centering.method = "mean", decreasing = TRUE) expected <- c("Sample1", "Sample3", "Sample2", "Sample4") expect_equal(result, expected) # Test with dimensions col names - result <- miaViz::getNeatOrder(neatsort_matrix, dimensions = c("PC1", "PC2"), centering_method = "mean") + result <- getNeatOrder(neatsort_matrix, dimensions = c("PC1", "PC2"), centering.method = "mean") expected <- c("Sample4", "Sample2", "Sample3", "Sample1") expect_equal(result, expected) - - # Test with valid inputs and subset row names - subset <- c("Sample1", "Sample3") - result <- miaViz::getNeatOrder(neatsort_matrix, subset, dimensions = c(1, 2), centering_method = "mean") - expected <- c("Sample3", "Sample1") - expect_equal(result, expected) - - # Test with valid inputs and subset row indices - subset <- c(1, 3) - result <- miaViz::getNeatOrder(neatsort_matrix, subset, dimensions = c(1, 2), centering_method = "mean") - expected <- c("Sample3", "Sample1") - expect_equal(result, expected) }) @@ -46,69 +34,54 @@ check_args_matrix <- matrix(1:20, nrow = 4, ncol = 5, test_that("Test check_args method", { # Argument errors - expect_error(miaViz:::check_args(), - "object 'check_args' not found") - expect_error(miaViz:::.check_args(check_args_matrix), - 'argument "subset" is missing') - expect_error(miaViz:::.check_args(check_args_matrix, c(1, 3, 4)), - 'argument \"dimensions\" is missing, with no default') - expect_error(miaViz:::.check_args(check_args_matrix, c(1, 3, 4), c(1, 3)), - 'argument \"centering_method\" is missing, with no default') - expect_error(miaViz:::.check_args(check_args_matrix, c(1, 3, 4), c(1, 3), "mean"), + expect_error(.check_args(), + "argument \"x\" is missing, with no default") + expect_error(.check_args(check_args_matrix), + "argument \"dimensions\" is missing, with no default") + expect_error(.check_args(check_args_matrix, c(1, 3, 4), c(1, 3)), + "argument \"centering.method\" is missing, with no default") + expect_error(.check_args(check_args_matrix, c(1, 3, 4), c(1, 3), "mean"), 'argument "decreasing" is missing') # Non-matrix input - expect_error(miaViz:::.check_args(as.data.frame(check_args_matrix), NULL, c(1, 2), "mean", FALSE), + expect_error(.check_args(as.data.frame(check_args_matrix), NULL, c(1, 2), "mean", FALSE), "Input data must be a matrix.") # Test for empty matrix empty_matrix <- matrix(numeric(0), nrow = 0, ncol = 0) - expect_error(miaViz:::.check_args(empty_matrix, NULL, c(1, 2), "mean", FALSE), + expect_error(.check_args(empty_matrix, NULL, c(1, 2), "mean", FALSE), "No data to plot. Matrix must have at least one row and one column.") - # Test valid subset - expect_silent(miaViz:::.check_args(check_args_matrix, NULL, c(1, 2), "mean", FALSE)) - expect_silent(miaViz:::.check_args(check_args_matrix, c(1, 3, 4), c(1, 2), "mean", FALSE)) - expect_silent(miaViz:::.check_args(check_args_matrix, c("Sample1", "Sample2"), c(1, 2), "mean", FALSE)) - - # Test invalid subset - expect_error(miaViz:::.check_args(check_args_matrix, 5, c(1, 2), "mean", FALSE), - "Subset refers to rows that do not exist in the data.") - expect_error(miaViz:::.check_args(check_args_matrix, "Sample5", c(1, 2), "mean", FALSE), - "Subset refers to row names that do not exist in the data.") - expect_error(miaViz:::.check_args(check_args_matrix, list(1, 2), c(1, 2), "mean", FALSE), - "Subset must be a vector of row indices or names.") - # Test for invalid dimensions - expect_error(miaViz:::.check_args(check_args_matrix, NULL, c(1, 6), "mean", FALSE), + expect_error(.check_args(check_args_matrix, NULL, c(1, 6), "mean", FALSE), "dimensions refer to columns that do not exist in the data.") - expect_error(miaViz:::.check_args(check_args_matrix, NULL, c("PC6"), "mean", FALSE), + expect_error(.check_args(check_args_matrix, NULL, c("PC6"), "mean", FALSE), "dimensions refer to column names that do not exist in the data.") - expect_error(miaViz:::.check_args(check_args_matrix, NULL, list("PC6"), "mean", FALSE), + expect_error(.check_args(check_args_matrix, NULL, list("PC6"), "mean", FALSE), "dimensions must be a vector of column indices or names.") - expect_error(miaViz:::.check_args(check_args_matrix, NULL, c(1, 2, 3), "mean", FALSE), + expect_error(.check_args(check_args_matrix, NULL, c(1, 2, 3), "mean", FALSE), "Exactly two dimensions must be specified.") # Test for invalid centering_method - expect_error(miaViz:::.check_args(check_args_matrix, NULL, c(1, 2), "invalid_method", FALSE), + expect_error(.check_args(check_args_matrix, NULL, c(1, 2), "invalid_method", FALSE), "'arg' should be one of “mean”, “median”, “none”") - # Test for invalid sorting_order - expect_error(miaViz:::.check_args(check_args_matrix, NULL, c(1, 2), "mean", "invalid_order"), + # Test for invalid sorting order + expect_error(.check_args(check_args_matrix, NULL, c(1, 2), "mean", "invalid_order"), "decreasing must be a single boolean value.") # Test for non-unique row names non_unique_row_matrix <- matrix(1:20, nrow = 4, ncol = 5, dimnames = list(c("Sample1", "Sample2", "Sample1", "Sample4"), c("PC1", "PC2", "PC3", "PC4", "PC5"))) - expect_error(miaViz:::.check_args(non_unique_row_matrix, NULL, c(1, 2), "mean", FALSE), + expect_error(.check_args(non_unique_row_matrix, NULL, c(1, 2), "mean", FALSE), "Row names of the matrix must be unique.") # Test for non-unique column names non_unique_col_matrix <- matrix(1:20, nrow = 4, ncol = 5, dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), c("PC1", "PC2", "PC1", "PC4", "PC5"))) - expect_error(miaViz:::.check_args(non_unique_col_matrix, NULL, c(1, 2), "mean", FALSE), + expect_error(.check_args(non_unique_col_matrix, NULL, c(1, 2), "mean", FALSE), "Column names of the matrix must be unique.") }) @@ -121,14 +94,14 @@ radial_theta_matrix <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8), ncol = 2, byrow = TRUE, test_that("Test radial_theta method", { # Argument errors - expect_error(miaViz:::.radial_theta(), - 'argument \"centering_method\" is missing, with no default') - expect_error(miaViz:::.radial_theta(radial_theta_matrix), - 'argument "centering_method" is missing') + expect_error(.radial_theta(), + "argument \"centering.method\" is missing, with no default") + expect_error(.radial_theta(radial_theta_matrix), + "argument \"centering.method\" is missing, with no default") # Centering by mean centering_method <- "mean" - result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) + result <- .radial_theta(radial_theta_matrix, centering_method) centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), c("PC1", "PC2"))) @@ -138,7 +111,7 @@ test_that("Test radial_theta method", { # Centering by median centering_method <- "median" - result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) + result <- .radial_theta(radial_theta_matrix, centering_method) centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), c("PC1", "PC2"))) @@ -148,15 +121,11 @@ test_that("Test radial_theta method", { # No centering centering_method <- "none" - result <- miaViz:::.radial_theta(radial_theta_matrix, centering_method) + result <- .radial_theta(radial_theta_matrix, centering_method) centered_data <- radial_theta_matrix expected <- atan2(centered_data[, 2], centered_data[, 1]) names(expected) <- rownames(centered_data) expect_equal(result, expected) - - # Unsupported centering method - centering_method <- "unsupported" - expect_error(miaViz:::.radial_theta(radial_theta_matrix, centering_method), "Unsupported centering method. Choose either 'mean', 'median', 'mode', or 'none'.") }) @@ -165,38 +134,38 @@ theta_values <- c(Sample1 = 0.5, Sample2 = -1.2, Sample3 = 1.5, Sample4 = -0.8) test_that("Test get_sorted_rownames method", { # Argument errors - expect_error(miaViz:::.get_sorted_rownames(), + expect_error(.get_sorted_rownames(), 'argument "theta_values" is missing') - expect_error(miaViz:::.get_sorted_rownames(theta_values), + expect_error(.get_sorted_rownames(theta_values), 'argument "decreasing" is missing') # Valid sorting in ascending order expected <- c("Sample2", "Sample4", "Sample1", "Sample3") - result <- miaViz:::.get_sorted_rownames(theta_values, FALSE) + result <- .get_sorted_rownames(theta_values, FALSE) expect_equal(result, expected) # Sorting in descending order - result <- miaViz:::.get_sorted_rownames(theta_values, TRUE) + result <- .get_sorted_rownames(theta_values, TRUE) expected <- c("Sample3", "Sample1", "Sample4", "Sample2") expect_equal(result, expected) # Edge case: all theta values are the same theta_values <- c(Sample1 = 0.5, Sample2 = 0.5, Sample3 = 0.5, Sample4 = 0.5) - result <- miaViz:::.get_sorted_rownames(theta_values, FALSE) + result <- .get_sorted_rownames(theta_values, FALSE) expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained expect_equal(result, expected) - result <- miaViz:::.get_sorted_rownames(theta_values, TRUE) + result <- .get_sorted_rownames(theta_values, TRUE) expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained expect_equal(result, expected) # Edge case: theta values contain NULL theta_values <- c(Sample1 = 0.5, Sample2 = NULL, Sample3 = 1.5, Sample4 = -0.8) - result <- miaViz:::.get_sorted_rownames(theta_values, FALSE) + result <- .get_sorted_rownames(theta_values, FALSE) expected <- c("Sample4", "Sample1", "Sample3") expect_equal(result, expected) - result <- miaViz:::.get_sorted_rownames(theta_values, TRUE) + result <- .get_sorted_rownames(theta_values, TRUE) expected <- c("Sample3", "Sample1", "Sample4") expect_equal(result, expected) }) From dd4fa00befbed822d9319cf1f58e0867e180c2f6 Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Thu, 20 Jun 2024 14:14:07 +0300 Subject: [PATCH 07/21] updated the method - not I have used the z version of transform assay not scale because it was failing the devtools::run_examples() check with scale --- R/getNeatOrder.R | 113 ++++++++++++----------------- man/getNeatOrder.Rd | 72 +++++++++--------- tests/testthat/test-getNeatOrder.R | 79 ++++++-------------- 3 files changed, 105 insertions(+), 159 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index 8ecd75f5..53eb25aa 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -8,7 +8,7 @@ #' The function takes in a matrix of ordinated data, optionally #' centers the data using specified methods (\code{mean}, \code{median}, or \code{none}), and then calculates #' the angle (theta) for each point relative to the centroid. The data points are then -#' sorted based on these theta values in either ascending or descending order. +#' sorted based on these theta values in ascending order. #' #' One significant application of this sorting method is in plotting heatmaps. #' By using radial theta sorting, the relationships between data points can be preserved @@ -17,15 +17,13 @@ #' allows for a more faithful representation of the data's intrinsic structure as captured #' by the ordination process. #' -#' @param x A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g. features or samples). -#' @param dimensions A \code{character} or \code{integer} vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates. -#' @param centering.method A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{mean}) -#' @param decreasing A \code{boolean} that when \code{TRUE} sorts the rows in a descending order by radial theta angle. (default: descending = \code{FALSE}) +#' @param x A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g. features or samples). There should be 2 columns only representing 2 PCs. +#' @param centering A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{mean}) #' @param ... Additional arguments passed to other methods. #' @return A \code{character} vector of row names in the sorted order. #' #' @details -#' It's important to note that the sechm package does actually have the functionality for plotting a heatmap using this radial theta angle ordering, though only by using an MDS ordination. +#' It's important to note that the \pkg{sechm} package does actually have the functionality for plotting a heatmap using this radial theta angle ordering, though only by using an MDS ordination. #' #' This functionality can be found at: #' @@ -34,14 +32,9 @@ #' That being said, the \code{getNeatOrder} function is more modular and separate to the plotting, and #' can be applied to any kind of ordinated data which can be valuable depending on the use case. #' -#' @references #' The below paper outlines the NeatMap method in more detail: #' -#' Rajaram, S., Oono, Y. NeatMap - non-clustering heat map alternatives in R. BMC Bioinformatics 11, 45 (2010). https://doi.org/10.1186/1471-2105-11-45. -#' -#' It can be found at: -#' -#' \url{https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-45}. +#' "Rajaram & Oono (2010)" - https://doi.org/10.1186/1471-2105-11-45. #' #' @name getNeatOrder #' @@ -49,32 +42,45 @@ #' ## Load the required libraries and dataset #' library(mia) #' library(scater) -#' library(sechm) +#' library(ComplexHeatmap) +#' library(circlize) #' data(peerj13075) #' #' ## Group data by taxonomic order #' tse <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) #' -#' ## Transform the samples into relative abundances -#' tse <- transformAssay(tse, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") +#' ## Add a pseudocount to the counts data +#' assay(tse, "counts") <- assay(tse, "counts") + 1 +#' +#' ## Transform the samples into relative abundances using CLR +#' tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr") #' #' ## Transform the features (taxa) into zero mean, unit variance (z transformation) -#' tse <- transformAssay(tse, assay.type="relabundance", method="z", MARGIN = "features", name="z") +#' tse <- transformAssay(tse, assay.type="clr", method="z", MARGIN = "features", name="z") #' #' ## Perform PCA using calculatePCA -#' pca_result <- calculatePCA(tse, ncomponents = 10, assay.type = "z") -#' -#' ## Add PCA results to the TreeSE object -#' reducedDim(tse, "PCA") <- pca_result +#' reducedDim(tse, "PCA") <- calculatePCA(tse, ncomponents = 10, assay.type = "z") #' #' ## Sort by radial theta and sort the original assay data -#' sorted_order <- getNeatOrder(reducedDim(tse, "PCA"), dimensions = c(1, 2), centering.method = "mean") +#' sorted_order <- getNeatOrder(reducedDim(tse, "PCA")[, c(1,2)], centering = "mean") #' tse <- tse[, sorted_order] #' -#' ## Create the heatmap with sechm whilst retaining this radial theta ordering -#' features <- rownames(assay(tse, "z")) -#' sechm_plot <- sechm(tse, assayName = "z", features=features, do.scale=FALSE, cluster_rows=FALSE, -#' sortRowsOn = NULL) +#' ## Define the color function and cap the colors at [-5, 5] +#' col_fun <- colorRamp2(c(-5, 0, 5), c("blue", "white", "red")) +#' +#' ## Create the heatmap +#' heatmap <- Heatmap(assay(tse, "z"), +#' name = "NeatMap", +#' col = col_fun, +#' cluster_rows = FALSE, # Do not cluster rows +#' cluster_columns = FALSE, # Do not cluster columns +#' show_row_dend = FALSE, +#' show_column_dend = FALSE, +#' row_names_gp = gpar(fontsize = 4), +#' column_names_gp = gpar(fontsize = 6), +#' heatmap_width = unit(20, "cm"), +#' heatmap_height = unit(15, "cm") +#' ) NULL #' @rdname getNeatOrder @@ -88,20 +94,15 @@ setGeneric("getNeatOrder", signature = c("x"), #' @export setMethod("getNeatOrder", signature = c("matrix"), function(x, - dimensions = c(1, 2), - centering.method = c("mean", "median", "none"), - decreasing = FALSE, + centering = "mean", ...){ # Check args - .check_args(x, subset, dimensions, centering.method, decreasing) - - # Take the correct dimensions - x <- x[, dimensions, drop = FALSE] + .check_args(x, centering) # Get the theta values and order them - theta_values <- .radial_theta(x, centering.method) - ordering <- .get_sorted_rownames(theta_values, decreasing) + theta_values <- .radial_theta(x, centering) + ordering <- .get_sorted_rownames(theta_values) return(ordering) } @@ -109,7 +110,7 @@ setMethod("getNeatOrder", signature = c("matrix"), # Checks the method arguments. -.check_args <- function(x, subset, dimensions, centering.method, decreasing) { +.check_args <- function(x, centering) { # Check data is a matrix if (!is.matrix(x)) { stop("Input data must be a matrix.", call. = FALSE) @@ -120,31 +121,13 @@ setMethod("getNeatOrder", signature = c("matrix"), stop("No data to plot. Matrix must have at least one row and one column.", call. = FALSE) } - # Check dimensions are valid - if (is.numeric(dimensions)) { - if (any(dimensions > ncol(x) | dimensions < 1)) { - stop("dimensions refer to columns that do not exist in the data.", call. = FALSE) - } - } else if (is.character(dimensions)) { - if (any(!dimensions %in% colnames(x))) { - stop("dimensions refer to column names that do not exist in the data.", call. = FALSE) - } - } else { - stop("dimensions must be a vector of column indices or names.", call. = FALSE) - } - - # Check dimension vector is of length 2 - if (length(dimensions) != 2) { - stop("Exactly two dimensions must be specified.", call. = FALSE) + # Check there is sufficient data + if (ncol(x) != 2) { + stop("Matrix must have only 2 columns.", call. = FALSE) } - # Check centering.method - centering.method <- match.arg(centering.method, c("mean", "median", "none")) - - # Check decreasing - if (!is.logical(decreasing) || length(decreasing) != 1) { - stop("decreasing must be a single boolean value.", call. = FALSE) - } + # Check centering argument + centering <- match.arg(centering, c("mean", "median", "none")) # Check for unique row names if (any(duplicated(rownames(x)))) { @@ -161,15 +144,15 @@ setMethod("getNeatOrder", signature = c("matrix"), # Computes the radial theta values for each row in the data matrix. -.radial_theta <- function(data, centering.method) { - # Choose the correct centering function based on the centering.method - center_fun <- switch(centering.method, "median" = median, "mean" = mean) +.radial_theta <- function(data, centering) { + # Choose the correct centering function based on the method + center_fun <- switch(centering, "median" = median, "mean" = mean) - # Apply the centering if there's a centering.method present + # Apply the centering if there's a method present if (!is.null(center_fun)) { center_vals <- apply(data, 2, center_fun) centered_data <- scale(data, center = center_vals, scale = FALSE) - } else if (centering.method == "none") { + } else if (centering == "none") { centered_data <- data } @@ -183,8 +166,8 @@ setMethod("getNeatOrder", signature = c("matrix"), # Sorts the theta values and returns the ordered row names. -.get_sorted_rownames <- function(theta_values, decreasing) { - sorted_indices <- order(theta_values, decreasing = decreasing) +.get_sorted_rownames <- function(theta_values) { + sorted_indices <- order(theta_values) rownames <- names(theta_values)[sorted_indices] return(rownames) } diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index 3315012e..355225a8 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -7,24 +7,14 @@ \usage{ getNeatOrder(x, ...) -\S4method{getNeatOrder}{matrix}( - x, - dimensions = c(1, 2), - centering.method = c("mean", "median", "none"), - decreasing = FALSE, - ... -) +\S4method{getNeatOrder}{matrix}(x, centering = "mean", ...) } \arguments{ -\item{x}{A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g. features or samples).} +\item{x}{A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g. features or samples). There should be 2 columns only representing 2 PCs.} \item{...}{Additional arguments passed to other methods.} -\item{dimensions}{A \code{character} or \code{integer} vector of length 2 specifying the columns of the matrix to use for the X and Y coordinates.} - -\item{centering.method}{A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{mean})} - -\item{decreasing}{A \code{boolean} that when \code{TRUE} sorts the rows in a descending order by radial theta angle. (default: descending = \code{FALSE})} +\item{centering}{A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{mean})} } \value{ A \code{character} vector of row names in the sorted order. @@ -38,7 +28,7 @@ has been applied. The function takes in a matrix of ordinated data, optionally centers the data using specified methods (\code{mean}, \code{median}, or \code{none}), and then calculates the angle (theta) for each point relative to the centroid. The data points are then -sorted based on these theta values in either ascending or descending order. +sorted based on these theta values in ascending order. One significant application of this sorting method is in plotting heatmaps. By using radial theta sorting, the relationships between data points can be preserved @@ -48,7 +38,7 @@ allows for a more faithful representation of the data's intrinsic structure as c by the ordination process. } \details{ -It's important to note that the sechm package does actually have the functionality for plotting a heatmap using this radial theta angle ordering, though only by using an MDS ordination. +It's important to note that the \pkg{sechm} package does actually have the functionality for plotting a heatmap using this radial theta angle ordering, though only by using an MDS ordination. This functionality can be found at: @@ -56,44 +46,52 @@ This functionality can be found at: That being said, the \code{getNeatOrder} function is more modular and separate to the plotting, and can be applied to any kind of ordinated data which can be valuable depending on the use case. + +The below paper outlines the NeatMap method in more detail: + +"Rajaram & Oono (2010)" - https://doi.org/10.1186/1471-2105-11-45. } \examples{ ## Load the required libraries and dataset library(mia) library(scater) -library(sechm) +library(ComplexHeatmap) +library(circlize) data(peerj13075) ## Group data by taxonomic order tse <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) -## Transform the samples into relative abundances -tse <- transformAssay(tse, assay.type = "counts", method="relabundance", MARGIN = "samples", name="relabundance") +## Add a pseudocount to the counts data +assay(tse, "counts") <- assay(tse, "counts") + 1 + +## Transform the samples into relative abundances using CLR +tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr") ## Transform the features (taxa) into zero mean, unit variance (z transformation) -tse <- transformAssay(tse, assay.type="relabundance", method="z", MARGIN = "features", name="z") +tse <- transformAssay(tse, assay.type="clr", method="z", MARGIN = "features", name="z") ## Perform PCA using calculatePCA -pca_result <- calculatePCA(tse, ncomponents = 10, assay.type = "z") - -## Add PCA results to the TreeSE object -reducedDim(tse, "PCA") <- pca_result +reducedDim(tse, "PCA") <- calculatePCA(tse, ncomponents = 10, assay.type = "z") ## Sort by radial theta and sort the original assay data -sorted_order <- getNeatOrder(reducedDim(tse, "PCA"), dimensions = c(1, 2), centering.method = "mean") +sorted_order <- getNeatOrder(reducedDim(tse, "PCA")[, c(1,2)], centering = "mean") tse <- tse[, sorted_order] -## Create the heatmap with sechm whilst retaining this radial theta ordering -features <- rownames(assay(tse, "z")) -sechm_plot <- sechm(tse, assayName = "z", features=features, do.scale=FALSE, cluster_rows=FALSE, - sortRowsOn = NULL) -} -\references{ -The below paper outlines the NeatMap method in more detail: - -Rajaram, S., Oono, Y. NeatMap - non-clustering heat map alternatives in R. BMC Bioinformatics 11, 45 (2010). https://doi.org/10.1186/1471-2105-11-45. - -It can be found at: - -\url{https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-45}. +## Define the color function and cap the colors at [-5, 5] +col_fun <- colorRamp2(c(-5, 0, 5), c("blue", "white", "red")) + +## Create the heatmap +heatmap <- Heatmap(assay(tse, "z"), + name = "NeatMap", + col = col_fun, + cluster_rows = FALSE, # Do not cluster rows + cluster_columns = FALSE, # Do not cluster columns + show_row_dend = FALSE, + show_column_dend = FALSE, + row_names_gp = gpar(fontsize = 4), + column_names_gp = gpar(fontsize = 6), + heatmap_width = unit(20, "cm"), + heatmap_height = unit(15, "cm") +) } diff --git a/tests/testthat/test-getNeatOrder.R b/tests/testthat/test-getNeatOrder.R index 64529af0..e5b38ec7 100644 --- a/tests/testthat/test-getNeatOrder.R +++ b/tests/testthat/test-getNeatOrder.R @@ -11,17 +11,12 @@ neatsort_matrix <- matrix(c(10, 8, 2, 8, 3, test_that("Test getNeatOrder function", { # Test with valid inputs - result <- getNeatOrder(neatsort_matrix, dimensions = c(1, 2), centering.method = "mean") + result <- getNeatOrder(neatsort_matrix[, c(1,2)], centering = "mean") expected <- c("Sample4", "Sample2", "Sample3", "Sample1") expect_equal(result, expected) - # Test with valid inputs, decreasing parameter = TRUE - result <- getNeatOrder(neatsort_matrix, dimensions = c(1, 2), centering.method = "mean", decreasing = TRUE) - expected <- c("Sample1", "Sample3", "Sample2", "Sample4") - expect_equal(result, expected) - - # Test with dimensions col names - result <- getNeatOrder(neatsort_matrix, dimensions = c("PC1", "PC2"), centering.method = "mean") + # Test with no method input + result <- getNeatOrder(neatsort_matrix[, c(1,2)]) expected <- c("Sample4", "Sample2", "Sample3", "Sample1") expect_equal(result, expected) }) @@ -36,54 +31,39 @@ test_that("Test check_args method", { # Argument errors expect_error(.check_args(), "argument \"x\" is missing, with no default") - expect_error(.check_args(check_args_matrix), - "argument \"dimensions\" is missing, with no default") - expect_error(.check_args(check_args_matrix, c(1, 3, 4), c(1, 3)), - "argument \"centering.method\" is missing, with no default") - expect_error(.check_args(check_args_matrix, c(1, 3, 4), c(1, 3), "mean"), - 'argument "decreasing" is missing') + expect_error(.check_args(check_args_matrix[, c(1,2)]), + "argument \"centering\" is missing, with no default") # Non-matrix input - expect_error(.check_args(as.data.frame(check_args_matrix), NULL, c(1, 2), "mean", FALSE), + expect_error(.check_args(as.data.frame(check_args_matrix)[, c(1,2)], centering = "mean"), "Input data must be a matrix.") # Test for empty matrix empty_matrix <- matrix(numeric(0), nrow = 0, ncol = 0) - expect_error(.check_args(empty_matrix, NULL, c(1, 2), "mean", FALSE), + expect_error(.check_args(empty_matrix, centering = "mean"), "No data to plot. Matrix must have at least one row and one column.") - # Test for invalid dimensions - expect_error(.check_args(check_args_matrix, NULL, c(1, 6), "mean", FALSE), - "dimensions refer to columns that do not exist in the data.") - expect_error(.check_args(check_args_matrix, NULL, c("PC6"), "mean", FALSE), - "dimensions refer to column names that do not exist in the data.") - expect_error(.check_args(check_args_matrix, NULL, list("PC6"), "mean", FALSE), - "dimensions must be a vector of column indices or names.") - expect_error(.check_args(check_args_matrix, NULL, c(1, 2, 3), "mean", FALSE), - "Exactly two dimensions must be specified.") + # Test for invalid number of columns + expect_error(.check_args(check_args_matrix, centering = "mean"), + "Matrix must have only 2 columns.") - # Test for invalid centering_method - expect_error(.check_args(check_args_matrix, NULL, c(1, 2), "invalid_method", FALSE), + # Test for invalid method + expect_error(.check_args(check_args_matrix[, c(1,2)], "invalid_method"), "'arg' should be one of “mean”, “median”, “none”") - # Test for invalid sorting order - expect_error(.check_args(check_args_matrix, NULL, c(1, 2), "mean", "invalid_order"), - "decreasing must be a single boolean value.") - # Test for non-unique row names non_unique_row_matrix <- matrix(1:20, nrow = 4, ncol = 5, dimnames = list(c("Sample1", "Sample2", "Sample1", "Sample4"), c("PC1", "PC2", "PC3", "PC4", "PC5"))) - expect_error(.check_args(non_unique_row_matrix, NULL, c(1, 2), "mean", FALSE), + expect_error(.check_args(non_unique_row_matrix[, c(1,2)], centering = "mean"), "Row names of the matrix must be unique.") # Test for non-unique column names non_unique_col_matrix <- matrix(1:20, nrow = 4, ncol = 5, dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), - c("PC1", "PC2", "PC1", "PC4", "PC5"))) - expect_error(.check_args(non_unique_col_matrix, NULL, c(1, 2), "mean", FALSE), + c("PC1", "PC1", "PC3", "PC4", "PC5"))) + expect_error(.check_args(non_unique_col_matrix[, c(1,2)], centering = "mean"), "Column names of the matrix must be unique.") - }) @@ -95,9 +75,9 @@ radial_theta_matrix <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8), ncol = 2, byrow = TRUE, test_that("Test radial_theta method", { # Argument errors expect_error(.radial_theta(), - "argument \"centering.method\" is missing, with no default") + "argument \"centering\" is missing, with no default") expect_error(.radial_theta(radial_theta_matrix), - "argument \"centering.method\" is missing, with no default") + "argument \"centering\" is missing, with no default") # Centering by mean centering_method <- "mean" @@ -136,37 +116,22 @@ test_that("Test get_sorted_rownames method", { # Argument errors expect_error(.get_sorted_rownames(), 'argument "theta_values" is missing') - expect_error(.get_sorted_rownames(theta_values), - 'argument "decreasing" is missing') - # Valid sorting in ascending order + # Valid sorting expected <- c("Sample2", "Sample4", "Sample1", "Sample3") - result <- .get_sorted_rownames(theta_values, FALSE) - expect_equal(result, expected) - - # Sorting in descending order - result <- .get_sorted_rownames(theta_values, TRUE) - expected <- c("Sample3", "Sample1", "Sample4", "Sample2") + result <- .get_sorted_rownames(theta_values) expect_equal(result, expected) # Edge case: all theta values are the same theta_values <- c(Sample1 = 0.5, Sample2 = 0.5, Sample3 = 0.5, Sample4 = 0.5) - result <- .get_sorted_rownames(theta_values, FALSE) + result <- .get_sorted_rownames(theta_values) expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained expect_equal(result, expected) - - result <- .get_sorted_rownames(theta_values, TRUE) - expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained - expect_equal(result, expected) - + # Edge case: theta values contain NULL theta_values <- c(Sample1 = 0.5, Sample2 = NULL, Sample3 = 1.5, Sample4 = -0.8) - result <- .get_sorted_rownames(theta_values, FALSE) + result <- .get_sorted_rownames(theta_values) expected <- c("Sample4", "Sample1", "Sample3") expect_equal(result, expected) - - result <- .get_sorted_rownames(theta_values, TRUE) - expected <- c("Sample3", "Sample1", "Sample4") - expect_equal(result, expected) }) From 990956b98b0e46173d44d4b9c902d32ac960095d Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Fri, 21 Jun 2024 10:46:14 +0300 Subject: [PATCH 08/21] changed example --- R/getNeatOrder.R | 4 ++-- man/getNeatOrder.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index 53eb25aa..1f315f81 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -59,10 +59,10 @@ #' tse <- transformAssay(tse, assay.type="clr", method="z", MARGIN = "features", name="z") #' #' ## Perform PCA using calculatePCA -#' reducedDim(tse, "PCA") <- calculatePCA(tse, ncomponents = 10, assay.type = "z") +#' res <- calculatePCA(tse, assay.type = "z") #' #' ## Sort by radial theta and sort the original assay data -#' sorted_order <- getNeatOrder(reducedDim(tse, "PCA")[, c(1,2)], centering = "mean") +#' sorted_order <- getNeatOrder(res[, c(1,2)], centering = "mean") #' tse <- tse[, sorted_order] #' #' ## Define the color function and cap the colors at [-5, 5] diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index 355225a8..23320443 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -72,10 +72,10 @@ tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "sample tse <- transformAssay(tse, assay.type="clr", method="z", MARGIN = "features", name="z") ## Perform PCA using calculatePCA -reducedDim(tse, "PCA") <- calculatePCA(tse, ncomponents = 10, assay.type = "z") +res <- calculatePCA(tse, assay.type = "z") ## Sort by radial theta and sort the original assay data -sorted_order <- getNeatOrder(reducedDim(tse, "PCA")[, c(1,2)], centering = "mean") +sorted_order <- getNeatOrder(res[, c(1,2)], centering = "mean") tse <- tse[, sorted_order] ## Define the color function and cap the colors at [-5, 5] From 65d5d70ac8b2ae7bc426a7bd8711cb9119a7d9c7 Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Mon, 24 Jun 2024 11:09:06 +0300 Subject: [PATCH 09/21] changing documentation --- R/getNeatOrder.R | 6 +++--- man/getNeatOrder.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index 1f315f81..c6f0242e 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -56,10 +56,10 @@ #' tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr") #' #' ## Transform the features (taxa) into zero mean, unit variance (z transformation) -#' tse <- transformAssay(tse, assay.type="clr", method="z", MARGIN = "features", name="z") +#' tse <- transformAssay(tse, assay.type="clr", method="standardize", MARGIN = "features", name="standardized") #' #' ## Perform PCA using calculatePCA -#' res <- calculatePCA(tse, assay.type = "z") +#' res <- calculatePCA(tse, assay.type = "standardized", ncomponents = 10) #' #' ## Sort by radial theta and sort the original assay data #' sorted_order <- getNeatOrder(res[, c(1,2)], centering = "mean") @@ -69,7 +69,7 @@ #' col_fun <- colorRamp2(c(-5, 0, 5), c("blue", "white", "red")) #' #' ## Create the heatmap -#' heatmap <- Heatmap(assay(tse, "z"), +#' heatmap <- Heatmap(assay(tse, "standardized"), #' name = "NeatMap", #' col = col_fun, #' cluster_rows = FALSE, # Do not cluster rows diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index 23320443..fa504c6a 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -69,10 +69,10 @@ assay(tse, "counts") <- assay(tse, "counts") + 1 tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr") ## Transform the features (taxa) into zero mean, unit variance (z transformation) -tse <- transformAssay(tse, assay.type="clr", method="z", MARGIN = "features", name="z") +tse <- transformAssay(tse, assay.type="clr", method="standardize", MARGIN = "features", name="standardized") ## Perform PCA using calculatePCA -res <- calculatePCA(tse, assay.type = "z") +res <- calculatePCA(tse, assay.type = "standardized", ncomponents = 10) ## Sort by radial theta and sort the original assay data sorted_order <- getNeatOrder(res[, c(1,2)], centering = "mean") @@ -82,7 +82,7 @@ tse <- tse[, sorted_order] col_fun <- colorRamp2(c(-5, 0, 5), c("blue", "white", "red")) ## Create the heatmap -heatmap <- Heatmap(assay(tse, "z"), +heatmap <- Heatmap(assay(tse, "standardized"), name = "NeatMap", col = col_fun, cluster_rows = FALSE, # Do not cluster rows From 9e464ac3b7a54b830ea63ebac21205d95aff776d Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Mon, 24 Jun 2024 14:48:51 +0300 Subject: [PATCH 10/21] changing docs --- R/getNeatOrder.R | 2 +- man/getNeatOrder.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index c6f0242e..9db4e85a 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -55,7 +55,7 @@ #' ## Transform the samples into relative abundances using CLR #' tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr") #' -#' ## Transform the features (taxa) into zero mean, unit variance (z transformation) +#' ## Transform the features (taxa) into zero mean, unit variance (standardize transformation) #' tse <- transformAssay(tse, assay.type="clr", method="standardize", MARGIN = "features", name="standardized") #' #' ## Perform PCA using calculatePCA diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index fa504c6a..6418b3ff 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -68,7 +68,7 @@ assay(tse, "counts") <- assay(tse, "counts") + 1 ## Transform the samples into relative abundances using CLR tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr") -## Transform the features (taxa) into zero mean, unit variance (z transformation) +## Transform the features (taxa) into zero mean, unit variance (standardize transformation) tse <- transformAssay(tse, assay.type="clr", method="standardize", MARGIN = "features", name="standardized") ## Perform PCA using calculatePCA From 59f1b5800432130b9c9e2743605acca63c9812d0 Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Thu, 27 Jun 2024 11:14:20 +0300 Subject: [PATCH 11/21] updated changes --- R/getNeatOrder.R | 31 +++++++++++------------------- man/getNeatOrder.Rd | 19 ++++++------------ tests/testthat/test-getNeatOrder.R | 3 --- 3 files changed, 17 insertions(+), 36 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index 9db4e85a..5fa6eb4e 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -18,23 +18,19 @@ #' by the ordination process. #' #' @param x A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g. features or samples). There should be 2 columns only representing 2 PCs. -#' @param centering A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{mean}) +#' @param centering A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{"mean"}) #' @param ... Additional arguments passed to other methods. #' @return A \code{character} vector of row names in the sorted order. #' #' @details -#' It's important to note that the \pkg{sechm} package does actually have the functionality for plotting a heatmap using this radial theta angle ordering, though only by using an MDS ordination. -#' -#' This functionality can be found at: -#' -#' \url{https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering}. -#' +#' It's important to note that the [\pkg{sechm}](https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering) package does actually have the functionality +#' for plotting a heatmap using this radial theta angle ordering, though only by using an +#' MDS ordination. +#' #' That being said, the \code{getNeatOrder} function is more modular and separate to the plotting, and #' can be applied to any kind of ordinated data which can be valuable depending on the use case. #' -#' The below paper outlines the NeatMap method in more detail: -#' -#' "Rajaram & Oono (2010)" - https://doi.org/10.1186/1471-2105-11-45. +#' [Rajaram & Oono (2010) NeatMap - non-clustering heat map alternatives in R](https://doi.org/10.1186/1471-2105-11-45) outlines this in more detail. #' #' @name getNeatOrder #' @@ -49,11 +45,8 @@ #' ## Group data by taxonomic order #' tse <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) #' -#' ## Add a pseudocount to the counts data -#' assay(tse, "counts") <- assay(tse, "counts") + 1 -#' #' ## Transform the samples into relative abundances using CLR -#' tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr") +#' tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr", pseudocount = TRUE) #' #' ## Transform the features (taxa) into zero mean, unit variance (standardize transformation) #' tse <- transformAssay(tse, assay.type="clr", method="standardize", MARGIN = "features", name="standardized") @@ -151,16 +144,14 @@ setMethod("getNeatOrder", signature = c("matrix"), # Apply the centering if there's a method present if (!is.null(center_fun)) { center_vals <- apply(data, 2, center_fun) - centered_data <- scale(data, center = center_vals, scale = FALSE) - } else if (centering == "none") { - centered_data <- data - } + data <- scale(data, center = center_vals, scale = FALSE) + } # Compute the radial theta values using the centered data - theta <- atan2(centered_data[, 2], centered_data[, 1]) + theta <- atan2(data[, 2], data[, 1]) # Set the names of theta values to the row names of the centered data and return the theta values - names(theta) <- rownames(centered_data) + names(theta) <- rownames(data) return(theta) } diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index 6418b3ff..d2041757 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -14,7 +14,7 @@ getNeatOrder(x, ...) \item{...}{Additional arguments passed to other methods.} -\item{centering}{A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{mean})} +\item{centering}{A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{"mean"})} } \value{ A \code{character} vector of row names in the sorted order. @@ -38,18 +38,14 @@ allows for a more faithful representation of the data's intrinsic structure as c by the ordination process. } \details{ -It's important to note that the \pkg{sechm} package does actually have the functionality for plotting a heatmap using this radial theta angle ordering, though only by using an MDS ordination. - -This functionality can be found at: - -\url{https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering}. +It's important to note that the \href{https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering}{\pkg{sechm}} package does actually have the functionality +for plotting a heatmap using this radial theta angle ordering, though only by using an +MDS ordination. That being said, the \code{getNeatOrder} function is more modular and separate to the plotting, and can be applied to any kind of ordinated data which can be valuable depending on the use case. -The below paper outlines the NeatMap method in more detail: - -"Rajaram & Oono (2010)" - https://doi.org/10.1186/1471-2105-11-45. +\href{https://doi.org/10.1186/1471-2105-11-45}{Rajaram & Oono (2010) NeatMap - non-clustering heat map alternatives in R} outlines this in more detail. } \examples{ ## Load the required libraries and dataset @@ -62,11 +58,8 @@ data(peerj13075) ## Group data by taxonomic order tse <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) -## Add a pseudocount to the counts data -assay(tse, "counts") <- assay(tse, "counts") + 1 - ## Transform the samples into relative abundances using CLR -tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr") +tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr", pseudocount = TRUE) ## Transform the features (taxa) into zero mean, unit variance (standardize transformation) tse <- transformAssay(tse, assay.type="clr", method="standardize", MARGIN = "features", name="standardized") diff --git a/tests/testthat/test-getNeatOrder.R b/tests/testthat/test-getNeatOrder.R index e5b38ec7..c993e4e7 100644 --- a/tests/testthat/test-getNeatOrder.R +++ b/tests/testthat/test-getNeatOrder.R @@ -1,6 +1,3 @@ -testthat::set_max_fails(Inf) - - context("neatsort") neatsort_matrix <- matrix(c(10, 8, 2, 8, 3, 4, 4, 4, 6, 5, From 0a25d36e380ddad629c1306aa22af86cf9fb671e Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Thu, 27 Jun 2024 12:18:50 +0300 Subject: [PATCH 12/21] added to description file --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0ccb87d9..1c190515 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -60,7 +60,9 @@ Suggests: patchwork, vegan, microbiomeDataSets, - bluster + bluster, + ComplexHeatmap, + circlize Remotes: github::microbiome/miaTime Roxygen: list(markdown = TRUE) From add21037f58a7dd33dfab38206790e91c6ebdc79 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Fri, 28 Jun 2024 10:01:24 +0300 Subject: [PATCH 13/21] Polished the code --- DESCRIPTION | 2 +- NEWS | 1 + R/getNeatOrder.R | 121 ++++++++++++++++++++++---------------------- man/getNeatOrder.Rd | 76 ++++++++++++++++------------ 4 files changed, 107 insertions(+), 93 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1c190515..7dfbd29b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: miaViz Title: Microbiome Analysis Plotting and Visualization -Version: 1.13.2 +Version: 1.13.3 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", diff --git a/NEWS b/NEWS index 5931bea2..035cd132 100644 --- a/NEWS +++ b/NEWS @@ -28,3 +28,4 @@ Changes in version 1.11.x Changes in version 1.13.x + plot*Tree: bugfix, ununique nodes + Added confidence.level parameter to plotCCA ++ Added getNeatOrder function diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index 5fa6eb4e..42cce103 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -1,67 +1,81 @@ #' Sorting by radial theta angle #' -#' @description \code{getNeatOrder} sorts already ordinated data by the radial theta angle. -#' This method is useful for organizing data points based on their angular -#' position in a 2D space, typically after an ordination technique such as PCA or NMDS -#' has been applied. +#' @description \code{getNeatOrder} sorts already ordinated data by the radial +#' theta angle. This method is useful for organizing data points based on their +#' angular position in a 2D space, typically after an ordination technique such +#' as PCA or NMDS has been applied. #' #' The function takes in a matrix of ordinated data, optionally -#' centers the data using specified methods (\code{mean}, \code{median}, or \code{none}), and then calculates -#' the angle (theta) for each point relative to the centroid. The data points are then -#' sorted based on these theta values in ascending order. +#' centers the data using specified methods (\code{mean}, \code{median}, or +#' \code{none}), and then calculates +#' the angle (theta) for each point relative to the centroid. The data points +#' are then sorted based on these theta values in ascending order. #' #' One significant application of this sorting method is in plotting heatmaps. -#' By using radial theta sorting, the relationships between data points can be preserved -#' according to the ordination method's spatial configuration, rather than relying on -#' hierarchical clustering, which may distort these relationships. This approach -#' allows for a more faithful representation of the data's intrinsic structure as captured -#' by the ordination process. -#' -#' @param x A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g. features or samples). There should be 2 columns only representing 2 PCs. -#' @param centering A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{"mean"}) +#' By using radial theta sorting, the relationships between data points can be +#' preserved according to the ordination method's spatial configuration, rather +#' than relying on hierarchical clustering, which may distort these +#' relationships. This approach allows for a more faithful representation of the +#' data's intrinsic structure as captured by the ordination process. +#' +#' @param x A matrix containing the ordinated data to be sorted. Columns should +#' represent the principal components (PCs) and rows should represent the +#' entities being analyzed (e.g. features or samples). There should be 2 columns +#' only representing 2 PCs. +#' +#' @param centering A single \code{character} value specifying the method to +#' center the data. Options are \code{mean}, \code{median}, or \code{none} if +#' your data is already centered. (default: method = \code{"mean"}) +#' #' @param ... Additional arguments passed to other methods. #' @return A \code{character} vector of row names in the sorted order. #' #' @details -#' It's important to note that the [\pkg{sechm}](https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering) package does actually have the functionality -#' for plotting a heatmap using this radial theta angle ordering, though only by using an -#' MDS ordination. +#' It's important to note that the +#' [\pkg{sechm}](https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering) +#' package does actually have the functionality for plotting a heatmap using +#' this radial theta angle ordering, though only by using an MDS ordination. #' -#' That being said, the \code{getNeatOrder} function is more modular and separate to the plotting, and -#' can be applied to any kind of ordinated data which can be valuable depending on the use case. +#' That being said, the \code{getNeatOrder} function is more modular and +#' separate to the plotting, and can be applied to any kind of ordinated data +#' which can be valuable depending on the use case. #' #' [Rajaram & Oono (2010) NeatMap - non-clustering heat map alternatives in R](https://doi.org/10.1186/1471-2105-11-45) outlines this in more detail. #' #' @name getNeatOrder #' #' @examples -#' ## Load the required libraries and dataset +#' # Load the required libraries and dataset #' library(mia) #' library(scater) #' library(ComplexHeatmap) #' library(circlize) #' data(peerj13075) #' -#' ## Group data by taxonomic order +#' # Group data by taxonomic order #' tse <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) #' -#' ## Transform the samples into relative abundances using CLR -#' tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr", pseudocount = TRUE) +#' # Transform the samples into relative abundances using CLR +#' tse <- transformAssay( +#' tse, assay.type = "counts", method="clr", MARGIN = "samples", +#' name="clr", pseudocount = TRUE) #' -#' ## Transform the features (taxa) into zero mean, unit variance (standardize transformation) -#' tse <- transformAssay(tse, assay.type="clr", method="standardize", MARGIN = "features", name="standardized") +#' # Transform the features (taxa) into zero mean, unit variance +#' # (standardize transformation) +#' tse <- transformAssay( +#' tse, assay.type="clr", method="standardize", MARGIN = "features") #' -#' ## Perform PCA using calculatePCA -#' res <- calculatePCA(tse, assay.type = "standardized", ncomponents = 10) +#' # Perform PCA using calculatePCA +#' res <- calculatePCA(tse, assay.type = "standardize", ncomponents = 10) #' -#' ## Sort by radial theta and sort the original assay data +#' # Sort by radial theta and sort the original assay data #' sorted_order <- getNeatOrder(res[, c(1,2)], centering = "mean") #' tse <- tse[, sorted_order] #' -#' ## Define the color function and cap the colors at [-5, 5] +#' # Define the color function and cap the colors at [-5, 5] #' col_fun <- colorRamp2(c(-5, 0, 5), c("blue", "white", "red")) #' -#' ## Create the heatmap +#' # Create the heatmap #' heatmap <- Heatmap(assay(tse, "standardized"), #' name = "NeatMap", #' col = col_fun, @@ -78,29 +92,23 @@ NULL #' @rdname getNeatOrder setGeneric("getNeatOrder", signature = c("x"), - function(x, ...) - standardGeneric("getNeatOrder")) + function(x, centering = "mean", ...) + standardGeneric("getNeatOrder")) # Implementation for taking in a raw matrix. #' @rdname getNeatOrder #' @export setMethod("getNeatOrder", signature = c("matrix"), - function(x, - centering = "mean", - ...){ - - # Check args - .check_args(x, centering) - - # Get the theta values and order them - theta_values <- .radial_theta(x, centering) - ordering <- .get_sorted_rownames(theta_values) - - return(ordering) - } - ) - + function(x, centering = "mean", ...){ + # Check args + .check_args(x, centering) + # Get the theta values and order them + theta_values <- .radial_theta(x, centering) + ordering <- .get_sorted_rownames(theta_values) + return(ordering) + } +) # Checks the method arguments. .check_args <- function(x, centering) { @@ -108,49 +116,42 @@ setMethod("getNeatOrder", signature = c("matrix"), if (!is.matrix(x)) { stop("Input data must be a matrix.", call. = FALSE) } - # Check there is sufficient data if (nrow(x) == 0 || ncol(x) == 0) { - stop("No data to plot. Matrix must have at least one row and one column.", call. = FALSE) + stop( + "No data to plot. Matrix must have at least one row and one ", + "column.", call. = FALSE) } - # Check there is sufficient data if (ncol(x) != 2) { stop("Matrix must have only 2 columns.", call. = FALSE) } - # Check centering argument centering <- match.arg(centering, c("mean", "median", "none")) - # Check for unique row names if (any(duplicated(rownames(x)))) { stop("Row names of the matrix must be unique.", call. = FALSE) } - # Check for unique column names if (any(duplicated(colnames(x)))) { stop("Column names of the matrix must be unique.", call. = FALSE) } - return(NULL) } - # Computes the radial theta values for each row in the data matrix. .radial_theta <- function(data, centering) { # Choose the correct centering function based on the method center_fun <- switch(centering, "median" = median, "mean" = mean) - # Apply the centering if there's a method present if (!is.null(center_fun)) { center_vals <- apply(data, 2, center_fun) data <- scale(data, center = center_vals, scale = FALSE) } - # Compute the radial theta values using the centered data theta <- atan2(data[, 2], data[, 1]) - - # Set the names of theta values to the row names of the centered data and return the theta values + # Set the names of theta values to the row names of the centered data and + # return the theta values names(theta) <- rownames(data) return(theta) } diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index d2041757..a9ea11f6 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -5,76 +5,88 @@ \alias{getNeatOrder,matrix-method} \title{Sorting by radial theta angle} \usage{ -getNeatOrder(x, ...) +getNeatOrder(x, centering = "mean", ...) \S4method{getNeatOrder}{matrix}(x, centering = "mean", ...) } \arguments{ -\item{x}{A matrix containing the ordinated data to be sorted. Columns should represent the principal components (PCs) and rows should represent the entities being analyzed (e.g. features or samples). There should be 2 columns only representing 2 PCs.} +\item{x}{A matrix containing the ordinated data to be sorted. Columns should +represent the principal components (PCs) and rows should represent the +entities being analyzed (e.g. features or samples). There should be 2 columns +only representing 2 PCs.} -\item{...}{Additional arguments passed to other methods.} +\item{centering}{A single \code{character} value specifying the method to +center the data. Options are \code{mean}, \code{median}, or \code{none} if +your data is already centered. (default: method = \code{"mean"})} -\item{centering}{A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if your data is already centered. (default: method = \code{"mean"})} +\item{...}{Additional arguments passed to other methods.} } \value{ A \code{character} vector of row names in the sorted order. } \description{ -\code{getNeatOrder} sorts already ordinated data by the radial theta angle. -This method is useful for organizing data points based on their angular -position in a 2D space, typically after an ordination technique such as PCA or NMDS -has been applied. +\code{getNeatOrder} sorts already ordinated data by the radial +theta angle. This method is useful for organizing data points based on their +angular position in a 2D space, typically after an ordination technique such +as PCA or NMDS has been applied. The function takes in a matrix of ordinated data, optionally -centers the data using specified methods (\code{mean}, \code{median}, or \code{none}), and then calculates -the angle (theta) for each point relative to the centroid. The data points are then -sorted based on these theta values in ascending order. +centers the data using specified methods (\code{mean}, \code{median}, or +\code{none}), and then calculates +the angle (theta) for each point relative to the centroid. The data points +are then sorted based on these theta values in ascending order. One significant application of this sorting method is in plotting heatmaps. -By using radial theta sorting, the relationships between data points can be preserved -according to the ordination method's spatial configuration, rather than relying on -hierarchical clustering, which may distort these relationships. This approach -allows for a more faithful representation of the data's intrinsic structure as captured -by the ordination process. +By using radial theta sorting, the relationships between data points can be +preserved according to the ordination method's spatial configuration, rather +than relying on hierarchical clustering, which may distort these +relationships. This approach allows for a more faithful representation of the +data's intrinsic structure as captured by the ordination process. } \details{ -It's important to note that the \href{https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering}{\pkg{sechm}} package does actually have the functionality -for plotting a heatmap using this radial theta angle ordering, though only by using an -MDS ordination. +It's important to note that the +\href{https://bioconductor.org/packages/3.18/bioc/vignettes/sechm/inst/doc/sechm.html#row-ordering}{\pkg{sechm}} +package does actually have the functionality for plotting a heatmap using +this radial theta angle ordering, though only by using an MDS ordination. -That being said, the \code{getNeatOrder} function is more modular and separate to the plotting, and -can be applied to any kind of ordinated data which can be valuable depending on the use case. +That being said, the \code{getNeatOrder} function is more modular and +separate to the plotting, and can be applied to any kind of ordinated data +which can be valuable depending on the use case. \href{https://doi.org/10.1186/1471-2105-11-45}{Rajaram & Oono (2010) NeatMap - non-clustering heat map alternatives in R} outlines this in more detail. } \examples{ -## Load the required libraries and dataset +# Load the required libraries and dataset library(mia) library(scater) library(ComplexHeatmap) library(circlize) data(peerj13075) -## Group data by taxonomic order +# Group data by taxonomic order tse <- agglomerateByRank(peerj13075, rank = "order", onRankOnly = TRUE) -## Transform the samples into relative abundances using CLR -tse <- transformAssay(tse, assay.type = "counts", method="clr", MARGIN = "samples", name="clr", pseudocount = TRUE) +# Transform the samples into relative abundances using CLR +tse <- transformAssay( + tse, assay.type = "counts", method="clr", MARGIN = "samples", + name="clr", pseudocount = TRUE) -## Transform the features (taxa) into zero mean, unit variance (standardize transformation) -tse <- transformAssay(tse, assay.type="clr", method="standardize", MARGIN = "features", name="standardized") +# Transform the features (taxa) into zero mean, unit variance +# (standardize transformation) +tse <- transformAssay( + tse, assay.type="clr", method="standardize", MARGIN = "features") -## Perform PCA using calculatePCA -res <- calculatePCA(tse, assay.type = "standardized", ncomponents = 10) +# Perform PCA using calculatePCA +res <- calculatePCA(tse, assay.type = "standardize", ncomponents = 10) -## Sort by radial theta and sort the original assay data +# Sort by radial theta and sort the original assay data sorted_order <- getNeatOrder(res[, c(1,2)], centering = "mean") tse <- tse[, sorted_order] -## Define the color function and cap the colors at [-5, 5] +# Define the color function and cap the colors at [-5, 5] col_fun <- colorRamp2(c(-5, 0, 5), c("blue", "white", "red")) -## Create the heatmap +# Create the heatmap heatmap <- Heatmap(assay(tse, "standardized"), name = "NeatMap", col = col_fun, From 861634892ba7da27479da7333ab0fe23df63330f Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Fri, 28 Jun 2024 14:17:06 +0300 Subject: [PATCH 14/21] made testing more robust --- tests/testthat/test-getNeatOrder.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-getNeatOrder.R b/tests/testthat/test-getNeatOrder.R index c993e4e7..9f2c3648 100644 --- a/tests/testthat/test-getNeatOrder.R +++ b/tests/testthat/test-getNeatOrder.R @@ -45,8 +45,8 @@ test_that("Test check_args method", { "Matrix must have only 2 columns.") # Test for invalid method - expect_error(.check_args(check_args_matrix[, c(1,2)], "invalid_method"), - "'arg' should be one of “mean”, “median”, “none”") + error_message <- expect_error(.check_args(check_args_matrix[, c(1,2)], "invalid_method")) + expect_true(grepl("'arg' should be one of", error_message)) # Test for non-unique row names non_unique_row_matrix <- matrix(1:20, nrow = 4, ncol = 5, From 20da458b422a067695487739913c20a7a7fc3191 Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Fri, 28 Jun 2024 14:40:03 +0300 Subject: [PATCH 15/21] updated example --- R/getNeatOrder.R | 2 +- man/getNeatOrder.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index 42cce103..ce4224db 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -76,7 +76,7 @@ #' col_fun <- colorRamp2(c(-5, 0, 5), c("blue", "white", "red")) #' #' # Create the heatmap -#' heatmap <- Heatmap(assay(tse, "standardized"), +#' heatmap <- Heatmap(assay(tse, "standardize"), #' name = "NeatMap", #' col = col_fun, #' cluster_rows = FALSE, # Do not cluster rows diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index a9ea11f6..865a69e1 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -87,7 +87,7 @@ tse <- tse[, sorted_order] col_fun <- colorRamp2(c(-5, 0, 5), c("blue", "white", "red")) # Create the heatmap -heatmap <- Heatmap(assay(tse, "standardized"), +heatmap <- Heatmap(assay(tse, "standardize"), name = "NeatMap", col = col_fun, cluster_rows = FALSE, # Do not cluster rows From 60737794540c8458345b78ea5672643f08cf1c4e Mon Sep 17 00:00:00 2001 From: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com> Date: Fri, 28 Jun 2024 15:21:25 +0300 Subject: [PATCH 16/21] Update getNeatOrder.R --- R/getNeatOrder.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index ce4224db..e58405cc 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -28,6 +28,7 @@ #' your data is already centered. (default: method = \code{"mean"}) #' #' @param ... Additional arguments passed to other methods. +#' #' @return A \code{character} vector of row names in the sorted order. #' #' @details From c1593a9924eb835abd79989778336502830e0390 Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Fri, 28 Jun 2024 16:20:54 +0300 Subject: [PATCH 17/21] updated method --- R/getNeatOrder.R | 16 +------- man/getNeatOrder.Rd | 2 +- tests/testthat/test-getNeatOrder.R | 64 +++++++++++------------------- 3 files changed, 27 insertions(+), 55 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index e58405cc..6cca4f55 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -29,7 +29,7 @@ #' #' @param ... Additional arguments passed to other methods. #' -#' @return A \code{character} vector of row names in the sorted order. +#' @return A \code{character} vector of row indices in the sorted order. #' #' @details #' It's important to note that the @@ -106,7 +106,7 @@ setMethod("getNeatOrder", signature = c("matrix"), .check_args(x, centering) # Get the theta values and order them theta_values <- .radial_theta(x, centering) - ordering <- .get_sorted_rownames(theta_values) + ordering <- order(theta_values) return(ordering) } ) @@ -133,10 +133,6 @@ setMethod("getNeatOrder", signature = c("matrix"), if (any(duplicated(rownames(x)))) { stop("Row names of the matrix must be unique.", call. = FALSE) } - # Check for unique column names - if (any(duplicated(colnames(x)))) { - stop("Column names of the matrix must be unique.", call. = FALSE) - } return(NULL) } @@ -156,11 +152,3 @@ setMethod("getNeatOrder", signature = c("matrix"), names(theta) <- rownames(data) return(theta) } - - -# Sorts the theta values and returns the ordered row names. -.get_sorted_rownames <- function(theta_values) { - sorted_indices <- order(theta_values) - rownames <- names(theta_values)[sorted_indices] - return(rownames) -} diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index 865a69e1..efb97dfe 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -22,7 +22,7 @@ your data is already centered. (default: method = \code{"mean"})} \item{...}{Additional arguments passed to other methods.} } \value{ -A \code{character} vector of row names in the sorted order. +A \code{character} vector of row indices in the sorted order. } \description{ \code{getNeatOrder} sorts already ordinated data by the radial diff --git a/tests/testthat/test-getNeatOrder.R b/tests/testthat/test-getNeatOrder.R index 9f2c3648..2f2a539e 100644 --- a/tests/testthat/test-getNeatOrder.R +++ b/tests/testthat/test-getNeatOrder.R @@ -6,16 +6,26 @@ neatsort_matrix <- matrix(c(10, 8, 2, 8, 3, dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), c("PC1", "PC2", "PC3", "PC4", "PC5"))) +neatsort_matrix_indices <- matrix(c(10, 8, 2, 8, 3, + 4, 4, 4, 6, 5, + 5, 4, 4, 1, 2, + 2, 3, 5, 7, 8), nrow = 4, ncol = 5, byrow = TRUE) + test_that("Test getNeatOrder function", { # Test with valid inputs result <- getNeatOrder(neatsort_matrix[, c(1,2)], centering = "mean") - expected <- c("Sample4", "Sample2", "Sample3", "Sample1") + expected <- c(4, 2, 3, 1) expect_equal(result, expected) # Test with no method input result <- getNeatOrder(neatsort_matrix[, c(1,2)]) - expected <- c("Sample4", "Sample2", "Sample3", "Sample1") + expected <- c(4, 2, 3, 1) expect_equal(result, expected) + + # Test with indice matrix + result <- getNeatOrder(neatsort_matrix_indices[, c(1,2)], centering = "mean") + expected <- c(4, 2, 3, 1) + expect_equal(result, expected_indices) }) @@ -55,12 +65,9 @@ test_that("Test check_args method", { expect_error(.check_args(non_unique_row_matrix[, c(1,2)], centering = "mean"), "Row names of the matrix must be unique.") - # Test for non-unique column names - non_unique_col_matrix <- matrix(1:20, nrow = 4, ncol = 5, - dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), - c("PC1", "PC1", "PC3", "PC4", "PC5"))) - expect_error(.check_args(non_unique_col_matrix[, c(1,2)], centering = "mean"), - "Column names of the matrix must be unique.") + # Test for non-unique row names in an indice matrix - should pass as row names are indices + non_unique_row_matrix_indices <- matrix(1:20, nrow = 4, ncol = 5) + expect_error(.check_args(non_unique_row_matrix_indices[, c(1,2)], centering = "mean"), NA) }) @@ -69,6 +76,8 @@ radial_theta_matrix <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8), ncol = 2, byrow = TRUE, dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), c("PC1", "PC2"))) +radial_theta_matrix_indices <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8), ncol = 2, byrow = TRUE) + test_that("Test radial_theta method", { # Argument errors expect_error(.radial_theta(), @@ -77,8 +86,7 @@ test_that("Test radial_theta method", { "argument \"centering\" is missing, with no default") # Centering by mean - centering_method <- "mean" - result <- .radial_theta(radial_theta_matrix, centering_method) + result <- .radial_theta(radial_theta_matrix, "mean") centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), c("PC1", "PC2"))) @@ -87,8 +95,7 @@ test_that("Test radial_theta method", { expect_equal(result, expected) # Centering by median - centering_method <- "median" - result <- .radial_theta(radial_theta_matrix, centering_method) + result <- .radial_theta(radial_theta_matrix, "median") centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE, dimnames = list(c("Sample1", "Sample2", "Sample3", "Sample4"), c("PC1", "PC2"))) @@ -97,38 +104,15 @@ test_that("Test radial_theta method", { expect_equal(result, expected) # No centering - centering_method <- "none" - result <- .radial_theta(radial_theta_matrix, centering_method) + result <- .radial_theta(radial_theta_matrix, "none") centered_data <- radial_theta_matrix expected <- atan2(centered_data[, 2], centered_data[, 1]) names(expected) <- rownames(centered_data) expect_equal(result, expected) -}) - - -context("get_sorted_rownames") -theta_values <- c(Sample1 = 0.5, Sample2 = -1.2, Sample3 = 1.5, Sample4 = -0.8) - -test_that("Test get_sorted_rownames method", { - # Argument errors - expect_error(.get_sorted_rownames(), - 'argument "theta_values" is missing') - - # Valid sorting - expected <- c("Sample2", "Sample4", "Sample1", "Sample3") - result <- .get_sorted_rownames(theta_values) - expect_equal(result, expected) - # Edge case: all theta values are the same - theta_values <- c(Sample1 = 0.5, Sample2 = 0.5, Sample3 = 0.5, Sample4 = 0.5) - result <- .get_sorted_rownames(theta_values) - expected <- c("Sample1", "Sample2", "Sample3", "Sample4") # Order should be maintained - expect_equal(result, expected) - - # Edge case: theta values contain NULL - theta_values <- c(Sample1 = 0.5, Sample2 = NULL, Sample3 = 1.5, Sample4 = -0.8) - result <- .get_sorted_rownames(theta_values) - expected <- c("Sample4", "Sample1", "Sample3") + # Test with indice matrix + result <- .radial_theta(radial_theta_matrix_indices, "mean") + centered_data <- matrix(c(-3, -3, -1, -1, 1, 1, 3, 3), ncol = 2, byrow = TRUE) + expected <- atan2(centered_data[, 2], centered_data[, 1]) expect_equal(result, expected) }) - From 6f13ad7aa347eb664f1ef80ccd1eafb6af0e9cb3 Mon Sep 17 00:00:00 2001 From: Sam Hillman Date: Fri, 28 Jun 2024 20:48:08 +0300 Subject: [PATCH 18/21] updated changes --- R/getNeatOrder.R | 4 ---- tests/testthat/test-getNeatOrder.R | 11 ----------- 2 files changed, 15 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index 6cca4f55..6dc879ee 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -129,10 +129,6 @@ setMethod("getNeatOrder", signature = c("matrix"), } # Check centering argument centering <- match.arg(centering, c("mean", "median", "none")) - # Check for unique row names - if (any(duplicated(rownames(x)))) { - stop("Row names of the matrix must be unique.", call. = FALSE) - } return(NULL) } diff --git a/tests/testthat/test-getNeatOrder.R b/tests/testthat/test-getNeatOrder.R index 2f2a539e..814c426c 100644 --- a/tests/testthat/test-getNeatOrder.R +++ b/tests/testthat/test-getNeatOrder.R @@ -57,17 +57,6 @@ test_that("Test check_args method", { # Test for invalid method error_message <- expect_error(.check_args(check_args_matrix[, c(1,2)], "invalid_method")) expect_true(grepl("'arg' should be one of", error_message)) - - # Test for non-unique row names - non_unique_row_matrix <- matrix(1:20, nrow = 4, ncol = 5, - dimnames = list(c("Sample1", "Sample2", "Sample1", "Sample4"), - c("PC1", "PC2", "PC3", "PC4", "PC5"))) - expect_error(.check_args(non_unique_row_matrix[, c(1,2)], centering = "mean"), - "Row names of the matrix must be unique.") - - # Test for non-unique row names in an indice matrix - should pass as row names are indices - non_unique_row_matrix_indices <- matrix(1:20, nrow = 4, ncol = 5) - expect_error(.check_args(non_unique_row_matrix_indices[, c(1,2)], centering = "mean"), NA) }) From 70a98f4a526ff83d40acb8c6ab4f1dd643e8bed4 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Fri, 28 Jun 2024 21:01:55 +0300 Subject: [PATCH 19/21] up --- R/getNeatOrder.R | 3 ++- man/getNeatOrder.Rd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index 6dc879ee..cbd286b6 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -25,7 +25,7 @@ #' #' @param centering A single \code{character} value specifying the method to #' center the data. Options are \code{mean}, \code{median}, or \code{none} if -#' your data is already centered. (default: method = \code{"mean"}) +#' your data is already centered. (default: \code{"mean"}) #' #' @param ... Additional arguments passed to other methods. #' @@ -89,6 +89,7 @@ #' heatmap_width = unit(20, "cm"), #' heatmap_height = unit(15, "cm") #' ) +#' NULL #' @rdname getNeatOrder diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index efb97dfe..0ae9b29a 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -17,7 +17,7 @@ only representing 2 PCs.} \item{centering}{A single \code{character} value specifying the method to center the data. Options are \code{mean}, \code{median}, or \code{none} if -your data is already centered. (default: method = \code{"mean"})} +your data is already centered. (default: \code{"mean"})} \item{...}{Additional arguments passed to other methods.} } @@ -99,4 +99,5 @@ heatmap <- Heatmap(assay(tse, "standardize"), heatmap_width = unit(20, "cm"), heatmap_height = unit(15, "cm") ) + } From 296a0ba5041d2cc2889a1c6ba8c1841872a72aa8 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Sat, 29 Jun 2024 10:42:26 +0300 Subject: [PATCH 20/21] up --- R/getNeatOrder.R | 25 +++++++++++++++---------- tests/testthat/test-getNeatOrder.R | 6 +++--- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/R/getNeatOrder.R b/R/getNeatOrder.R index cbd286b6..5675bf47 100644 --- a/R/getNeatOrder.R +++ b/R/getNeatOrder.R @@ -7,9 +7,9 @@ #' #' The function takes in a matrix of ordinated data, optionally #' centers the data using specified methods (\code{mean}, \code{median}, or -#' \code{none}), and then calculates -#' the angle (theta) for each point relative to the centroid. The data points -#' are then sorted based on these theta values in ascending order. +#' \code{NULL}), and then calculates the angle (theta) for each point relative +#' to the centroid. The data points are then sorted based on these theta values +#' in ascending order. #' #' One significant application of this sorting method is in plotting heatmaps. #' By using radial theta sorting, the relationships between data points can be @@ -24,8 +24,8 @@ #' only representing 2 PCs. #' #' @param centering A single \code{character} value specifying the method to -#' center the data. Options are \code{mean}, \code{median}, or \code{none} if -#' your data is already centered. (default: \code{"mean"}) +#' center the data. Options are \code{"mean"}, \code{"median"}, or \code{NULL} +#' if your data is already centered. (default: \code{"mean"}) #' #' @param ... Additional arguments passed to other methods. #' @@ -129,16 +129,21 @@ setMethod("getNeatOrder", signature = c("matrix"), stop("Matrix must have only 2 columns.", call. = FALSE) } # Check centering argument - centering <- match.arg(centering, c("mean", "median", "none")) + if ( !(is.null(centering) || (.is_a_string(centering) && + centering %in% c("mean", "median", NULL))) ){ + stop( + "'centering' must be a single character value or NULL.", + call. = FALSE) + } return(NULL) } # Computes the radial theta values for each row in the data matrix. .radial_theta <- function(data, centering) { - # Choose the correct centering function based on the method - center_fun <- switch(centering, "median" = median, "mean" = mean) - # Apply the centering if there's a method present - if (!is.null(center_fun)) { + # Apply the centering if centering is specified + if (!is.null(centering)) { + # Choose the correct centering function based on the method + center_fun <- switch(centering, "median" = median, "mean" = mean) center_vals <- apply(data, 2, center_fun) data <- scale(data, center = center_vals, scale = FALSE) } diff --git a/tests/testthat/test-getNeatOrder.R b/tests/testthat/test-getNeatOrder.R index 814c426c..33dc5cbf 100644 --- a/tests/testthat/test-getNeatOrder.R +++ b/tests/testthat/test-getNeatOrder.R @@ -25,7 +25,7 @@ test_that("Test getNeatOrder function", { # Test with indice matrix result <- getNeatOrder(neatsort_matrix_indices[, c(1,2)], centering = "mean") expected <- c(4, 2, 3, 1) - expect_equal(result, expected_indices) + expect_equal(result, expected) }) @@ -56,7 +56,7 @@ test_that("Test check_args method", { # Test for invalid method error_message <- expect_error(.check_args(check_args_matrix[, c(1,2)], "invalid_method")) - expect_true(grepl("'arg' should be one of", error_message)) + expect_true(grepl("'centering' must be a single character value or NULL.", error_message)) }) @@ -93,7 +93,7 @@ test_that("Test radial_theta method", { expect_equal(result, expected) # No centering - result <- .radial_theta(radial_theta_matrix, "none") + result <- .radial_theta(radial_theta_matrix, NULL) centered_data <- radial_theta_matrix expected <- atan2(centered_data[, 2], centered_data[, 1]) names(expected) <- rownames(centered_data) From 404b0a8c674c3dc55abe244b148b1227fac2b3e4 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Sat, 29 Jun 2024 10:52:17 +0300 Subject: [PATCH 21/21] update docs --- man/getNeatOrder.Rd | 10 +++++----- man/plotCCA.Rd | 8 ++++---- man/plotNMDS.Rd | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/man/getNeatOrder.Rd b/man/getNeatOrder.Rd index 0ae9b29a..b5c3d707 100644 --- a/man/getNeatOrder.Rd +++ b/man/getNeatOrder.Rd @@ -16,8 +16,8 @@ entities being analyzed (e.g. features or samples). There should be 2 columns only representing 2 PCs.} \item{centering}{A single \code{character} value specifying the method to -center the data. Options are \code{mean}, \code{median}, or \code{none} if -your data is already centered. (default: \code{"mean"})} +center the data. Options are \code{"mean"}, \code{"median"}, or \code{NULL} +if your data is already centered. (default: \code{"mean"})} \item{...}{Additional arguments passed to other methods.} } @@ -32,9 +32,9 @@ as PCA or NMDS has been applied. The function takes in a matrix of ordinated data, optionally centers the data using specified methods (\code{mean}, \code{median}, or -\code{none}), and then calculates -the angle (theta) for each point relative to the centroid. The data points -are then sorted based on these theta values in ascending order. +\code{NULL}), and then calculates the angle (theta) for each point relative +to the centroid. The data points are then sorted based on these theta values +in ascending order. One significant application of this sorting method is in plotting heatmaps. By using radial theta sorting, the relationships between data points can be diff --git a/man/plotCCA.Rd b/man/plotCCA.Rd index 3820850b..0b0dd151 100644 --- a/man/plotCCA.Rd +++ b/man/plotCCA.Rd @@ -182,10 +182,10 @@ plotRDA(tse, "RDA", # Calculate RDA as a separate object rda_mat <- getRDA(tse, - formula = assay ~ ClinicalStatus + Gender + Age, - FUN = vegan::vegdist, - distance = "bray", - na.action = na.exclude) + formula = assay ~ ClinicalStatus + Gender + Age, + FUN = vegan::vegdist, + distance = "bray", + na.action = na.exclude) # Create RDA plot from RDA matrix plotRDA(rda_mat) diff --git a/man/plotNMDS.Rd b/man/plotNMDS.Rd index 2bb2e693..578660da 100644 --- a/man/plotNMDS.Rd +++ b/man/plotNMDS.Rd @@ -11,7 +11,7 @@ plotNMDS(x, ..., ncomponents = 2) \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} object.} -\item{...}{additional arguments passed to scater::plotReducedDim()} +\item{...}{additional arguments passed to scater::plotReducedDim().} \item{ncomponents}{A numeric scalar indicating the number of dimensions to plot, starting from the first dimension. Alternatively, a numeric vector specifying the