From 69ec08ceee21f358f0e5b4d9892a78d7e105cbc3 Mon Sep 17 00:00:00 2001 From: "Win Cowger, PhD" Date: Thu, 22 Feb 2024 13:37:59 -0800 Subject: [PATCH 1/5] add entropy --- R/sig_noise.R | 23 +++++++++++++++++++---- man/sig_noise.Rd | 10 +++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/R/sig_noise.R b/R/sig_noise.R index c454697c..92b48932 100644 --- a/R/sig_noise.R +++ b/R/sig_noise.R @@ -14,8 +14,10 @@ #' noise where signal is estimated as the max intensity and noise is #' estimated as the height of a low intensity region.), #' \code{"log_tot_sig"} (sum of the inverse log intensities, useful for spectra in log units), -#' or \code{"tot_sig"} (sum of intensities). +#' \code{"tot_sig"} (sum of intensities), or \code{"entropy"} (Shannon entropy of intensities).. #' @param step numeric; the step size of the region to look for the run_sig_over_noise option. +#' @param prob numeric; the probability to retrieve for the quantile where the noise will be interpreted with the run_sig_over_noise option. +#' @param breaks the number or positions of the breaks for entropy calculation. Defaults to infer a decent value from the data. #' @param sig_min numeric; the minimum wavenumber value for the signal region. #' @param sig_max numeric; the maximum wavenumber value for the signal region. #' @param noise_min numeric; the minimum wavenumber value for the noise region. @@ -65,7 +67,11 @@ sig_noise.default <- function(x, ...) { #' #' @export sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise", - na.rm = TRUE, step = 20, + na.rm = TRUE, prob = 0.5, step = 20, + breaks = seq(min(unlist(x$spectra)), + max(unlist(x$spectra)), + length = ((nrow(x$spectra)^(1/3))*(max(unlist(x$spectra)) - min(unlist(x$spectra))))/ + (2*IQR(unlist(x$spectra)))), sig_min = NULL, sig_max = NULL, noise_min = NULL, noise_max = NULL, abs = T, spatial_smooth = F, sigma = c(1,1), threshold = NULL, ...) { @@ -79,8 +85,8 @@ sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise", } max <- frollapply(y[!is.na(y)], step, max) max[(length(max) - (step-1)):length(max)] <- NA - signal <- max(max, na.rm = T) - noise <- median(max[max != 0], na.rm = T) + signal <- max(max, na.rm = na.rm) + noise <- quantile(max[max != 0], probs = prob, na.rm = na.rm) } else { if(!is.null(sig_min) & !is.null(sig_max)){ sig_intens <- y[x$wavenumber >= sig_min & x$wavenumber <= sig_max] @@ -92,6 +98,13 @@ sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise", } else { noise_intens <- y } + if(metric == "entropy"){ + binarize <- cut(sig_intens, breaks) + freq <- table(binarize)/length(binarize) + vec <- as.vector(freq) + vec <- vec[vec>0] + return(-sum(vec * log2(vec))) + } signal <- mean(sig_intens, na.rm = na.rm) noise <- sd(noise_intens, na.rm = na.rm) } @@ -105,6 +118,7 @@ sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise", if(metric == "tot_sig") return(sum(y)) if(metric == "log_tot_sig") return(sum(exp(y))) }, FUN.VALUE = numeric(1)) + if(spatial_smooth){ values <- matrix(values, ncol = max(x$metadata$x) + 1, byrow = T) |> gaussianSmooth(sigma = sigma) |> @@ -114,6 +128,7 @@ sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise", if(abs) { values <- abs(values) } + if(!is.null(threshold)){ return(values >= threshold) } diff --git a/man/sig_noise.Rd b/man/sig_noise.Rd index 0cec1955..8c823cf4 100644 --- a/man/sig_noise.Rd +++ b/man/sig_noise.Rd @@ -14,7 +14,11 @@ sig_noise(x, ...) x, metric = "run_sig_over_noise", na.rm = TRUE, + prob = 0.5, step = 20, + breaks = seq(min(unlist(x$spectra)), max(unlist(x$spectra)), length = + ((nrow(x$spectra)^(1/3)) * (max(unlist(x$spectra)) - min(unlist(x$spectra))))/(2 * + IQR(unlist(x$spectra)))), sig_min = NULL, sig_max = NULL, noise_min = NULL, @@ -37,13 +41,17 @@ noise), \code{"run_sig_over_noise"} (absolute value of signal / noise where signal is estimated as the max intensity and noise is estimated as the height of a low intensity region.), \code{"log_tot_sig"} (sum of the inverse log intensities, useful for spectra in log units), -or \code{"tot_sig"} (sum of intensities).} +\code{"tot_sig"} (sum of intensities), or \code{"entropy"} (Shannon entropy of intensities)..} \item{na.rm}{logical; indicating whether missing values should be removed when calculating signal and noise. Default is \code{TRUE}.} +\item{prob}{numeric; the probability to retrieve for the quantile where the noise will be interpreted with the run_sig_over_noise option.} + \item{step}{numeric; the step size of the region to look for the run_sig_over_noise option.} +\item{breaks}{the number or positions of the breaks for entropy calculation. Defaults to infer a decent value from the data.} + \item{sig_min}{numeric; the minimum wavenumber value for the signal region.} \item{sig_max}{numeric; the maximum wavenumber value for the signal region.} From 94bd67c80ede8b2115520fd6bbd8aadbadb1e550 Mon Sep 17 00:00:00 2001 From: "Win Cowger, PhD" Date: Thu, 22 Feb 2024 13:48:11 -0800 Subject: [PATCH 2/5] add tests for entropy --- R/sig_noise.R | 4 ++-- man/sig_noise.Rd | 4 ++-- tests/testthat/test-sig_noise.R | 24 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/R/sig_noise.R b/R/sig_noise.R index 92b48932..1946df17 100644 --- a/R/sig_noise.R +++ b/R/sig_noise.R @@ -16,8 +16,8 @@ #' \code{"log_tot_sig"} (sum of the inverse log intensities, useful for spectra in log units), #' \code{"tot_sig"} (sum of intensities), or \code{"entropy"} (Shannon entropy of intensities).. #' @param step numeric; the step size of the region to look for the run_sig_over_noise option. -#' @param prob numeric; the probability to retrieve for the quantile where the noise will be interpreted with the run_sig_over_noise option. -#' @param breaks the number or positions of the breaks for entropy calculation. Defaults to infer a decent value from the data. +#' @param prob numeric single value; the probability to retrieve for the quantile where the noise will be interpreted with the run_sig_over_noise option. +#' @param breaks numeric; the number or positions of the breaks for entropy calculation. Defaults to infer a decent value from the data. #' @param sig_min numeric; the minimum wavenumber value for the signal region. #' @param sig_max numeric; the maximum wavenumber value for the signal region. #' @param noise_min numeric; the minimum wavenumber value for the noise region. diff --git a/man/sig_noise.Rd b/man/sig_noise.Rd index 8c823cf4..5844b11c 100644 --- a/man/sig_noise.Rd +++ b/man/sig_noise.Rd @@ -46,11 +46,11 @@ estimated as the height of a low intensity region.), \item{na.rm}{logical; indicating whether missing values should be removed when calculating signal and noise. Default is \code{TRUE}.} -\item{prob}{numeric; the probability to retrieve for the quantile where the noise will be interpreted with the run_sig_over_noise option.} +\item{prob}{numeric single value; the probability to retrieve for the quantile where the noise will be interpreted with the run_sig_over_noise option.} \item{step}{numeric; the step size of the region to look for the run_sig_over_noise option.} -\item{breaks}{the number or positions of the breaks for entropy calculation. Defaults to infer a decent value from the data.} +\item{breaks}{numeric; the number or positions of the breaks for entropy calculation. Defaults to infer a decent value from the data.} \item{sig_min}{numeric; the minimum wavenumber value for the signal region.} diff --git a/tests/testthat/test-sig_noise.R b/tests/testthat/test-sig_noise.R index ed536021..61ff66d1 100644 --- a/tests/testthat/test-sig_noise.R +++ b/tests/testthat/test-sig_noise.R @@ -35,3 +35,27 @@ test_that("sig_noise() returns correct values", { expect_equal(97527) }) +test_that("entropy results in accurate info", { + sig_noise(raman_hdpe, + metric = "entropy", + breaks = 10) |> + round(2) |> + unname() |> + expect_equal(1.17) + + noise <- raman_hdpe + noise$spectra[[1]] <- runif(n = length(noise$spectra[[1]])) + + sig_noise(noise, metric = "entropy", + breaks = 10) |> + round(2) |> + unname() |> + expect_gt(3.32) + + expect_true(sig_noise(raman_hdpe, + metric = "entropy", + breaks = 10) |> + unname() < + sig_noise(noise, metric = "entropy", + breaks = 10) |> unname()) +}) From 471d43ff0d5c476882a81d5977df35ee9b730aa6 Mon Sep 17 00:00:00 2001 From: "Win Cowger, PhD" Date: Wed, 28 Feb 2024 16:25:35 -0800 Subject: [PATCH 3/5] add na handling to ignore or remove. --- NAMESPACE | 3 + R/manage_na.R | 102 ++++++++++++++++++++++++++++++++ R/sig_noise.R | 2 +- man/manage_na.Rd | 53 +++++++++++++++++ man/sig_noise.Rd | 2 +- tests/testthat/test-sig_noise.R | 3 +- 6 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 R/manage_na.R create mode 100644 man/manage_na.Rd diff --git a/NAMESPACE b/NAMESPACE index c0b1fa79..2474ef42 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,8 @@ S3method(interactive_plot,default) S3method(lines,OpenSpecy) S3method(make_rel,OpenSpecy) S3method(make_rel,default) +S3method(manage_na,OpenSpecy) +S3method(manage_na,default) S3method(match_spec,OpenSpecy) S3method(match_spec,default) S3method(os_similarity,OpenSpecy) @@ -97,6 +99,7 @@ export(is_OpenSpecy) export(is_empty_vector) export(load_lib) export(make_rel) +export(manage_na) export(match_spec) export(max_cor_named) export(mean_replace) diff --git a/R/manage_na.R b/R/manage_na.R new file mode 100644 index 00000000..4f8a5b95 --- /dev/null +++ b/R/manage_na.R @@ -0,0 +1,102 @@ +#' @rdname manage_na +#' @title Ignore or Remove NA intensities +#' +#' @description +#' Sometimes you want to keep or remove NA values in intensities to allow for spectra with +#' varying shapes to be analyzed together or maintained in a single Open Specy object. +#' +#' @param x a numeric vector or an \R OpenSpecy object +#' @param lead_tail_only logical whether to only look at leading adn tailing values. +#' @param ig_zero logical, whether to ignore both zeros and NAs +#' @param fun the name of the function you want run. +#' @param type character of either "ignore" or "remove". +#' @param \ldots further arguments passed to \code{fun}. +#' +#' @return +#' \code{manage_na()} return logical vectors of NA locations (if vector provided) or an +#' \code{OpenSpecy} object with ignored or removed NA values. +#' +#' @examples +#' manage_na(c(NA, -1, NA, 1, 10)) +#' manage_na(c(NA, -1, NA, 1, 10), lead_tail_only = F) +#' manage_na(c(NA, 0, NA, 1, 10), lead_tail_only = F, ig_zero = T) +#' data(raman_hdpe) +#' raman_hdpe$spectra[[1]][1:10] <- NA +#' manage_na(raman_hdpe, fun = make_rel) #would normally return all NA without na.rm = T but doesn't here. +#' manage_na(raman_hdpe, type = "remove") #will remove the first 10 values we set to NA +#' +#' @author +#' Win Cowger, Zacharias Steinmetz +#' +#' @seealso +#' \code{OpenSpecy} object to be matched with a reference library +#' \code{fill_spec()} can be used to fill NA values in Open Specy objects. +#' \code{restrict_range()} can be used to restrict spectral ranges in other ways than removing NAs. +#' +#' @export +manage_na <- function(x, ...) { + UseMethod("manage_na") +} + +#' @rdname manage_na +#' @export +manage_na.default <- function(x, lead_tail_only = T, ig_zero = F, ...) { + + if(lead_tail_only){ + na_positions <- logical(length(x)) + if(is.na(x[1])){ + criteria = T + y = 1 + while(criteria){ + if(is.na(x[y])|(ig_zero & x[y] == 0)) na_positions[y] <- T + y = y + 1 + criteria = is.na(x[y])|(ig_zero & x[y] == 0) + } + } + if(is.na(x[length(x)])){ + criteria = T + y = length(x) + while(criteria){ + if(is.na(x[y])|(ig_zero & x[y] == 0)) na_positions[y] <- T + y = y - 1 + criteria = is.na(x[y])|(ig_zero & x[y] == 0) + } + } + } + else{ + na_positions <- is.na(x)|(ig_zero & x == 0) + } + + return(na_positions) +} + +#' @rdname manage_na +#' @export +manage_na.OpenSpecy <- function(x, lead_tail_only = T, ig_zero = F, fun, type = "ignore", ...) { + + consistent <- x$spectra[, lapply(.SD, manage_na, + lead_tail_only = lead_tail_only, + ig_zero = ig_zero)] |> + rowSums() == 0 + + if(sum(consistent) < 3 & type == "ignore") stop("Not enough non NA values, need atleast 3") + + if(type == "ignore"){ + reduced <- as_OpenSpecy(x$wavenumber[consistent], x$spectra[consistent,], x$metadata) |> + fun(...) + + x$spectra <- x$spectra[, lapply(.SD, as.numeric)] + + x$spectra[consistent,] <- reduced$spectra + } + + if(sum(!consistent) < 3 & type == "remove") stop("Not enough non NA values left, need atleast 3") + + if(type == "remove"){ + x <- as_OpenSpecy(x$wavenumber[consistent], x$spectra[consistent,], x$metadata) + } + + return(x) + +} + diff --git a/R/sig_noise.R b/R/sig_noise.R index 1946df17..6e5bc41c 100644 --- a/R/sig_noise.R +++ b/R/sig_noise.R @@ -26,7 +26,7 @@ #' @param spatial_smooth logical; whether to spatially smooth the sig/noise using the xy #' coordinates and a gaussian smoother. #' @param sigma numeric; two value vector describing standard deviation for smoother in -#' each xy dimension, should be the same for each in most cases. +#' each dimension, y is specified first followed by x, should be the same for each in most cases. #' @param threshold numeric; if NULL, no threshold is set, otherwise use a numeric value #' to set the target threshold which true signal or noise should be above. The #' function will return a logical value instead of numeric if a threshold is set. diff --git a/man/manage_na.Rd b/man/manage_na.Rd new file mode 100644 index 00000000..b31437ea --- /dev/null +++ b/man/manage_na.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/manage_na.R +\name{manage_na} +\alias{manage_na} +\alias{manage_na.default} +\alias{manage_na.OpenSpecy} +\title{Ignore or Remove NA intensities} +\usage{ +manage_na(x, ...) + +\method{manage_na}{default}(x, lead_tail_only = T, ig_zero = F, ...) + +\method{manage_na}{OpenSpecy}(x, lead_tail_only = T, ig_zero = F, fun, type = "ignore", ...) +} +\arguments{ +\item{x}{a numeric vector or an \R OpenSpecy object} + +\item{lead_tail_only}{logical whether to only look at leading adn tailing values.} + +\item{ig_zero}{logical, whether to ignore both zeros and NAs} + +\item{fun}{the name of the function you want run.} + +\item{type}{character of either "ignore" or "remove".} + +\item{\ldots}{further arguments passed to \code{fun}.} +} +\value{ +\code{manage_na()} return logical vectors of NA locations (if vector provided) or an +\code{OpenSpecy} object with ignored or removed NA values. +} +\description{ +Sometimes you want to keep or remove NA values in intensities to allow for spectra with +varying shapes to be analyzed together or maintained in a single Open Specy object. +} +\examples{ +manage_na(c(NA, -1, NA, 1, 10)) +manage_na(c(NA, -1, NA, 1, 10), lead_tail_only = F) +manage_na(c(NA, 0, NA, 1, 10), lead_tail_only = F, ig_zero = T) +data(raman_hdpe) +raman_hdpe$spectra[[1]][1:10] <- NA +manage_na(raman_hdpe, fun = make_rel) #would normally return all NA without na.rm = T but doesn't here. +manage_na(raman_hdpe, type = "remove") #will remove the first 10 values we set to NA + +} +\seealso{ +\code{OpenSpecy} object to be matched with a reference library +\code{fill_spec()} can be used to fill NA values in Open Specy objects. +\code{restrict_range()} can be used to restrict spectral ranges in other ways than removing NAs. +} +\author{ +Win Cowger, Zacharias Steinmetz +} diff --git a/man/sig_noise.Rd b/man/sig_noise.Rd index 5844b11c..dcd2831f 100644 --- a/man/sig_noise.Rd +++ b/man/sig_noise.Rd @@ -66,7 +66,7 @@ when calculating signal and noise. Default is \code{TRUE}.} coordinates and a gaussian smoother.} \item{sigma}{numeric; two value vector describing standard deviation for smoother in -each xy dimension, should be the same for each in most cases.} +each dimension, y is specified first followed by x, should be the same for each in most cases.} \item{threshold}{numeric; if NULL, no threshold is set, otherwise use a numeric value to set the target threshold which true signal or noise should be above. The diff --git a/tests/testthat/test-sig_noise.R b/tests/testthat/test-sig_noise.R index 61ff66d1..8b3eb79e 100644 --- a/tests/testthat/test-sig_noise.R +++ b/tests/testthat/test-sig_noise.R @@ -44,13 +44,14 @@ test_that("entropy results in accurate info", { expect_equal(1.17) noise <- raman_hdpe + set.seed(10) noise$spectra[[1]] <- runif(n = length(noise$spectra[[1]])) sig_noise(noise, metric = "entropy", breaks = 10) |> round(2) |> unname() |> - expect_gt(3.32) + expect_equal(3.32) expect_true(sig_noise(raman_hdpe, metric = "entropy", From 6dde823b86c30c6e189458d2c6428960c4fe52d6 Mon Sep 17 00:00:00 2001 From: "Win Cowger, PhD" Date: Wed, 28 Feb 2024 16:49:51 -0800 Subject: [PATCH 4/5] add tests --- R/manage_na.R | 6 ++---- tests/testthat/test-manage_na.R | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-manage_na.R diff --git a/R/manage_na.R b/R/manage_na.R index 4f8a5b95..f1a1de3b 100644 --- a/R/manage_na.R +++ b/R/manage_na.R @@ -42,6 +42,8 @@ manage_na <- function(x, ...) { #' @export manage_na.default <- function(x, lead_tail_only = T, ig_zero = F, ...) { + if(all(is.na(x))) stop("All intensity values are NA, cannot remove or ignore with manage na.") + if(lead_tail_only){ na_positions <- logical(length(x)) if(is.na(x[1])){ @@ -79,8 +81,6 @@ manage_na.OpenSpecy <- function(x, lead_tail_only = T, ig_zero = F, fun, type = ig_zero = ig_zero)] |> rowSums() == 0 - if(sum(consistent) < 3 & type == "ignore") stop("Not enough non NA values, need atleast 3") - if(type == "ignore"){ reduced <- as_OpenSpecy(x$wavenumber[consistent], x$spectra[consistent,], x$metadata) |> fun(...) @@ -90,8 +90,6 @@ manage_na.OpenSpecy <- function(x, lead_tail_only = T, ig_zero = F, fun, type = x$spectra[consistent,] <- reduced$spectra } - if(sum(!consistent) < 3 & type == "remove") stop("Not enough non NA values left, need atleast 3") - if(type == "remove"){ x <- as_OpenSpecy(x$wavenumber[consistent], x$spectra[consistent,], x$metadata) } diff --git a/tests/testthat/test-manage_na.R b/tests/testthat/test-manage_na.R new file mode 100644 index 00000000..7c6c4a55 --- /dev/null +++ b/tests/testthat/test-manage_na.R @@ -0,0 +1,32 @@ + + +test_that("manage_na works as expected", { + manage_na(c(NA, -1, NA, 1, 10)) |> + expect_identical(c(TRUE, FALSE, FALSE, FALSE, FALSE)) + manage_na(c(NA, -1, NA, 1, 10), lead_tail_only = F) |> + expect_identical(c(TRUE, FALSE, TRUE, FALSE, FALSE)) + manage_na(c(NA, 0, NA, 1, 10), lead_tail_only = F, ig_zero = T) |> + expect_identical(c(TRUE, TRUE, TRUE, FALSE, FALSE)) + + data(raman_hdpe) + raman_hdpe$spectra[[1]][1:10] <- NA + ignore <- manage_na(raman_hdpe, fun = make_rel) + ignore$spectra[[1]] |> + is.na() |> + sum() |> + expect_equal(10) + remove <- manage_na(raman_hdpe, type = "remove") + remove$spectra[[1]] |> + is.na() |> + sum() |> + expect_equal(0) +}) + + +test_that("manage_na works as expected", { + data(raman_hdpe) + raman_hdpe$spectra[[1]][3:nrow(raman_hdpe$spectra)] <- NA + manage_na(raman_hdpe, fun = make_rel) |> expect_silent() + raman_hdpe$spectra[[1]][1:nrow(raman_hdpe$spectra)] <- NA + manage_na(raman_hdpe, fun = make_rel) |> expect_error() +}) From 91da0c4b4fdfcf8ce22880cc8d4d545b6ac879ea Mon Sep 17 00:00:00 2001 From: "Win Cowger, PhD" Date: Wed, 28 Feb 2024 17:01:45 -0800 Subject: [PATCH 5/5] add desc --- R/manage_na.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/manage_na.R b/R/manage_na.R index f1a1de3b..4d471405 100644 --- a/R/manage_na.R +++ b/R/manage_na.R @@ -8,7 +8,7 @@ #' @param x a numeric vector or an \R OpenSpecy object #' @param lead_tail_only logical whether to only look at leading adn tailing values. #' @param ig_zero logical, whether to ignore both zeros and NAs -#' @param fun the name of the function you want run. +#' @param fun the name of the function you want run, this is only used if the "ignore" type is chosen. #' @param type character of either "ignore" or "remove". #' @param \ldots further arguments passed to \code{fun}. #'