Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add entropy #168

Merged
merged 5 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ S3method(interactive_plot,default)
S3method(lines,OpenSpecy)
S3method(make_rel,OpenSpecy)
S3method(make_rel,default)
S3method(manage_na,OpenSpecy)
S3method(manage_na,default)
S3method(match_spec,OpenSpecy)
S3method(match_spec,default)
S3method(os_similarity,OpenSpecy)
Expand Down Expand Up @@ -97,6 +99,7 @@ export(is_OpenSpecy)
export(is_empty_vector)
export(load_lib)
export(make_rel)
export(manage_na)
export(match_spec)
export(max_cor_named)
export(mean_replace)
Expand Down
100 changes: 100 additions & 0 deletions R/manage_na.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#' @rdname manage_na
#' @title Ignore or Remove NA intensities
#'
#' @description
#' Sometimes you want to keep or remove NA values in intensities to allow for spectra with
#' varying shapes to be analyzed together or maintained in a single Open Specy object.
#'
#' @param x a numeric vector or an \R OpenSpecy object
#' @param lead_tail_only logical whether to only look at leading adn tailing values.
#' @param ig_zero logical, whether to ignore both zeros and NAs
#' @param fun the name of the function you want run, this is only used if the "ignore" type is chosen.
#' @param type character of either "ignore" or "remove".
#' @param \ldots further arguments passed to \code{fun}.
#'
#' @return
#' \code{manage_na()} return logical vectors of NA locations (if vector provided) or an
#' \code{OpenSpecy} object with ignored or removed NA values.
#'
#' @examples
#' manage_na(c(NA, -1, NA, 1, 10))
#' manage_na(c(NA, -1, NA, 1, 10), lead_tail_only = F)
#' manage_na(c(NA, 0, NA, 1, 10), lead_tail_only = F, ig_zero = T)
#' data(raman_hdpe)
#' raman_hdpe$spectra[[1]][1:10] <- NA
#' manage_na(raman_hdpe, fun = make_rel) #would normally return all NA without na.rm = T but doesn't here.
#' manage_na(raman_hdpe, type = "remove") #will remove the first 10 values we set to NA
#'
#' @author
#' Win Cowger, Zacharias Steinmetz
#'
#' @seealso
#' \code{OpenSpecy} object to be matched with a reference library
#' \code{fill_spec()} can be used to fill NA values in Open Specy objects.
#' \code{restrict_range()} can be used to restrict spectral ranges in other ways than removing NAs.
#'
#' @export
manage_na <- function(x, ...) {
UseMethod("manage_na")
}

#' @rdname manage_na
#' @export
manage_na.default <- function(x, lead_tail_only = T, ig_zero = F, ...) {

if(all(is.na(x))) stop("All intensity values are NA, cannot remove or ignore with manage na.")

if(lead_tail_only){
na_positions <- logical(length(x))
if(is.na(x[1])){
criteria = T
y = 1
while(criteria){
if(is.na(x[y])|(ig_zero & x[y] == 0)) na_positions[y] <- T
y = y + 1
criteria = is.na(x[y])|(ig_zero & x[y] == 0)
}
}
if(is.na(x[length(x)])){
criteria = T
y = length(x)
while(criteria){
if(is.na(x[y])|(ig_zero & x[y] == 0)) na_positions[y] <- T
y = y - 1
criteria = is.na(x[y])|(ig_zero & x[y] == 0)
}
}
}
else{
na_positions <- is.na(x)|(ig_zero & x == 0)
}

return(na_positions)
}

#' @rdname manage_na
#' @export
manage_na.OpenSpecy <- function(x, lead_tail_only = T, ig_zero = F, fun, type = "ignore", ...) {

consistent <- x$spectra[, lapply(.SD, manage_na,
lead_tail_only = lead_tail_only,
ig_zero = ig_zero)] |>
rowSums() == 0

if(type == "ignore"){
reduced <- as_OpenSpecy(x$wavenumber[consistent], x$spectra[consistent,], x$metadata) |>
fun(...)

x$spectra <- x$spectra[, lapply(.SD, as.numeric)]

x$spectra[consistent,] <- reduced$spectra
}

if(type == "remove"){
x <- as_OpenSpecy(x$wavenumber[consistent], x$spectra[consistent,], x$metadata)
}

return(x)

}

25 changes: 20 additions & 5 deletions R/sig_noise.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
#' noise where signal is estimated as the max intensity and noise is
#' estimated as the height of a low intensity region.),
#' \code{"log_tot_sig"} (sum of the inverse log intensities, useful for spectra in log units),
#' or \code{"tot_sig"} (sum of intensities).
#' \code{"tot_sig"} (sum of intensities), or \code{"entropy"} (Shannon entropy of intensities)..
#' @param step numeric; the step size of the region to look for the run_sig_over_noise option.
#' @param prob numeric single value; the probability to retrieve for the quantile where the noise will be interpreted with the run_sig_over_noise option.
#' @param breaks numeric; the number or positions of the breaks for entropy calculation. Defaults to infer a decent value from the data.
#' @param sig_min numeric; the minimum wavenumber value for the signal region.
#' @param sig_max numeric; the maximum wavenumber value for the signal region.
#' @param noise_min numeric; the minimum wavenumber value for the noise region.
Expand All @@ -24,7 +26,7 @@
#' @param spatial_smooth logical; whether to spatially smooth the sig/noise using the xy
#' coordinates and a gaussian smoother.
#' @param sigma numeric; two value vector describing standard deviation for smoother in
#' each xy dimension, should be the same for each in most cases.
#' each dimension, y is specified first followed by x, should be the same for each in most cases.
#' @param threshold numeric; if NULL, no threshold is set, otherwise use a numeric value
#' to set the target threshold which true signal or noise should be above. The
#' function will return a logical value instead of numeric if a threshold is set.
Expand Down Expand Up @@ -65,7 +67,11 @@ sig_noise.default <- function(x, ...) {
#'
#' @export
sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise",
na.rm = TRUE, step = 20,
na.rm = TRUE, prob = 0.5, step = 20,
breaks = seq(min(unlist(x$spectra)),
max(unlist(x$spectra)),
length = ((nrow(x$spectra)^(1/3))*(max(unlist(x$spectra)) - min(unlist(x$spectra))))/
(2*IQR(unlist(x$spectra)))),
sig_min = NULL, sig_max = NULL,
noise_min = NULL, noise_max = NULL, abs = T,
spatial_smooth = F, sigma = c(1,1), threshold = NULL, ...) {
Expand All @@ -79,8 +85,8 @@ sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise",
}
max <- frollapply(y[!is.na(y)], step, max)
max[(length(max) - (step-1)):length(max)] <- NA
signal <- max(max, na.rm = T)
noise <- median(max[max != 0], na.rm = T)
signal <- max(max, na.rm = na.rm)
noise <- quantile(max[max != 0], probs = prob, na.rm = na.rm)
} else {
if(!is.null(sig_min) & !is.null(sig_max)){
sig_intens <- y[x$wavenumber >= sig_min & x$wavenumber <= sig_max]
Expand All @@ -92,6 +98,13 @@ sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise",
} else {
noise_intens <- y
}
if(metric == "entropy"){
binarize <- cut(sig_intens, breaks)
freq <- table(binarize)/length(binarize)
vec <- as.vector(freq)
vec <- vec[vec>0]
return(-sum(vec * log2(vec)))
}
signal <- mean(sig_intens, na.rm = na.rm)
noise <- sd(noise_intens, na.rm = na.rm)
}
Expand All @@ -105,6 +118,7 @@ sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise",
if(metric == "tot_sig") return(sum(y))
if(metric == "log_tot_sig") return(sum(exp(y)))
}, FUN.VALUE = numeric(1))

if(spatial_smooth){
values <- matrix(values, ncol = max(x$metadata$x) + 1, byrow = T) |>
gaussianSmooth(sigma = sigma) |>
Expand All @@ -114,6 +128,7 @@ sig_noise.OpenSpecy <- function(x, metric = "run_sig_over_noise",
if(abs) {
values <- abs(values)
}

if(!is.null(threshold)){
return(values >= threshold)
}
Expand Down
53 changes: 53 additions & 0 deletions man/manage_na.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions man/sig_noise.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions tests/testthat/test-manage_na.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@


test_that("manage_na works as expected", {
manage_na(c(NA, -1, NA, 1, 10)) |>
expect_identical(c(TRUE, FALSE, FALSE, FALSE, FALSE))
manage_na(c(NA, -1, NA, 1, 10), lead_tail_only = F) |>
expect_identical(c(TRUE, FALSE, TRUE, FALSE, FALSE))
manage_na(c(NA, 0, NA, 1, 10), lead_tail_only = F, ig_zero = T) |>
expect_identical(c(TRUE, TRUE, TRUE, FALSE, FALSE))

data(raman_hdpe)
raman_hdpe$spectra[[1]][1:10] <- NA
ignore <- manage_na(raman_hdpe, fun = make_rel)
ignore$spectra[[1]] |>
is.na() |>
sum() |>
expect_equal(10)
remove <- manage_na(raman_hdpe, type = "remove")
remove$spectra[[1]] |>
is.na() |>
sum() |>
expect_equal(0)
})


test_that("manage_na works as expected", {
data(raman_hdpe)
raman_hdpe$spectra[[1]][3:nrow(raman_hdpe$spectra)] <- NA
manage_na(raman_hdpe, fun = make_rel) |> expect_silent()
raman_hdpe$spectra[[1]][1:nrow(raman_hdpe$spectra)] <- NA
manage_na(raman_hdpe, fun = make_rel) |> expect_error()
})
25 changes: 25 additions & 0 deletions tests/testthat/test-sig_noise.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,28 @@ test_that("sig_noise() returns correct values", {
expect_equal(97527)
})

test_that("entropy results in accurate info", {
sig_noise(raman_hdpe,
metric = "entropy",
breaks = 10) |>
round(2) |>
unname() |>
expect_equal(1.17)

noise <- raman_hdpe
set.seed(10)
noise$spectra[[1]] <- runif(n = length(noise$spectra[[1]]))

sig_noise(noise, metric = "entropy",
breaks = 10) |>
round(2) |>
unname() |>
expect_equal(3.32)

expect_true(sig_noise(raman_hdpe,
metric = "entropy",
breaks = 10) |>
unname() <
sig_noise(noise, metric = "entropy",
breaks = 10) |> unname())
})
Loading